diff options
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/X86.h | 5 | ||||
-rw-r--r-- | lib/Target/X86/X86COFFMachineModuleInfo.h | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86CallingConv.td | 9 | ||||
-rw-r--r-- | lib/Target/X86/X86FloatingPoint.cpp | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 190 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 8 | ||||
-rw-r--r-- | lib/Target/X86/X86Instr64bit.td | 9 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 305 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.h | 25 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 24 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.cpp | 68 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.cpp | 2 |
14 files changed, 418 insertions, 234 deletions
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index a167118..684c61f 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -62,11 +62,6 @@ MCCodeEmitter *createX86MCCodeEmitter(const Target &, TargetMachine &TM); /// FunctionPass *createEmitX86CodeToMemory(); -/// createX86MaxStackAlignmentCalculatorPass - This function returns a pass -/// which calculates maximal stack alignment required for function -/// -FunctionPass *createX86MaxStackAlignmentCalculatorPass(); - extern Target TheX86_32Target, TheX86_64Target; } // End llvm namespace diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index afd5525..5017af2 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -16,6 +16,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/ADT/StringSet.h" +#include "X86MachineFunctionInfo.h" namespace llvm { class X86MachineFunctionInfo; diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index d77f039..12d3d04 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -64,11 +64,18 @@ def RetCC_X86_32_C : CallingConv<[ // X86-32 FastCC return-value convention. def RetCC_X86_32_Fast : CallingConv<[ // The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has - // SSE2, otherwise it is the the C calling conventions. + // SSE2. // This can happen when a float, 2 x float, or 3 x float vector is split by // target lowering, and is returned in 1-3 sse regs. CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, + + // For integers, ECX can be used as an extra return register + CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>, + CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>, + CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>, + + // Otherwise, it is the same as the common X86 calling convention. CCDelegateTo<RetCC_X86Common> ]>; diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index a2fe9b0..044bd4b 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -289,7 +289,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { while (Start != BB.begin() && prior(Start) != PrevI) --Start; errs() << "Inserted instructions:\n\t"; Start->print(errs(), &MF.getTarget()); - while (++Start != next(I)) {} + while (++Start != llvm::next(I)) {} } dumpStack(); ); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d80b8ec..0517b56 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -595,6 +595,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand); + setOperationAction(ISD::TRUNCATE, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand); + for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) + setTruncStoreAction((MVT::SimpleValueType)VT, + (MVT::SimpleValueType)InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); } // FIXME: In order to prevent SSE instructions being expanded to MMX ones @@ -671,8 +683,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); - setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand); - setOperationAction(ISD::TRUNCATE, MVT::v8i8, Expand); setOperationAction(ISD::SELECT, MVT::v8i8, Promote); setOperationAction(ISD::SELECT, MVT::v4i16, Promote); setOperationAction(ISD::SELECT, MVT::v2i32, Promote); @@ -3344,6 +3354,82 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, } SDValue +X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, + SelectionDAG &DAG) { + + // Check if the scalar load can be widened into a vector load. And if + // the address is "base + cst" see if the cst can be "absorbed" into + // the shuffle mask. + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) { + SDValue Ptr = LD->getBasePtr(); + if (!ISD::isNormalLoad(LD) || LD->isVolatile()) + return SDValue(); + EVT PVT = LD->getValueType(0); + if (PVT != MVT::i32 && PVT != MVT::f32) + return SDValue(); + + int FI = -1; + int64_t Offset = 0; + if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) { + FI = FINode->getIndex(); + Offset = 0; + } else if (Ptr.getOpcode() == ISD::ADD && + isa<ConstantSDNode>(Ptr.getOperand(1)) && + isa<FrameIndexSDNode>(Ptr.getOperand(0))) { + FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + Offset = Ptr.getConstantOperandVal(1); + Ptr = Ptr.getOperand(0); + } else { + return SDValue(); + } + + SDValue Chain = LD->getChain(); + // Make sure the stack object alignment is at least 16. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + if (DAG.InferPtrAlignment(Ptr) < 16) { + if (MFI->isFixedObjectIndex(FI)) { + // Can't change the alignment. Reference stack + offset explicitly + // if stack pointer is at least 16-byte aligned. + unsigned StackAlign = Subtarget->getStackAlignment(); + if (StackAlign < 16) + return SDValue(); + Offset = MFI->getObjectOffset(FI) + Offset; + SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, + getPointerTy()); + Ptr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, + DAG.getConstant(Offset & ~15, getPointerTy())); + Offset %= 16; + } else { + MFI->setObjectAlignment(FI, 16); + } + } + + // (Offset % 16) must be multiple of 4. Then address is then + // Ptr + (Offset & ~15). + if (Offset < 0) + return SDValue(); + if ((Offset % 16) & 3) + return SDValue(); + int64_t StartOffset = Offset & ~15; + if (StartOffset) + Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(), + Ptr,DAG.getConstant(StartOffset, Ptr.getValueType())); + + int EltNo = (Offset - StartOffset) >> 2; + int Mask[4] = { EltNo, EltNo, EltNo, EltNo }; + EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32; + SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0); + // Canonicalize it to a v4i32 shuffle. + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getVectorShuffle(MVT::v4i32, dl, V1, + DAG.getUNDEF(MVT::v4i32), &Mask[0])); + } + + return SDValue(); +} + +SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); // All zero's are handled with pxor, all one's are handled with pcmpeqd. @@ -3486,8 +3572,19 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } // Splat is obviously ok. Let legalizer expand it to a shuffle. - if (Values.size() == 1) + if (Values.size() == 1) { + if (EVTBits == 32) { + // Instead of a shuffle like this: + // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> + // Check if it's possible to issue this instead. + // shuffle (vload ptr)), undef, <1, 1, 1, 1> + unsigned Idx = CountTrailingZeros_32(NonZeros); + SDValue Item = Op.getOperand(Idx); + if (Op.getNode()->isOnlyUserOf(Item.getNode())) + return LowerAsSplatVectorLoad(Item, VT, dl, DAG); + } return SDValue(); + } // A vector full of immediates; various special cases are already // handled, so this is best done with a single constant-pool load. @@ -4278,7 +4375,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { unsigned ShAmt = 0; SDValue ShVal; bool isShift = getSubtarget()->hasSSE2() && - isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); + isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. @@ -4815,6 +4912,7 @@ static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); DebugLoc dl = GA->getDebugLoc(); SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), @@ -4828,6 +4926,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue Ops[] = { Chain, TGA }; Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2); } + + // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. + MFI->setHasCalls(true); + SDValue Flag = Chain.getValue(1); return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); } @@ -5648,6 +5750,17 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { return SDValue(); SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG); + + // Use sbb x, x to materialize carry bit into a GPR. + // FIXME: Temporarily disabled since it breaks self-hosting. It's apparently + // miscompiling ARMISelDAGToDAG.cpp. + if (0 && !isFP && X86CC == X86::COND_B) { + return DAG.getNode(ISD::AND, dl, MVT::i8, + DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), Cond), + DAG.getConstant(1, MVT::i8)); + } + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), Cond); } @@ -5800,9 +5913,18 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { Cond = NewCond; } + // Look pass (and (setcc_carry (cmp ...)), 1). + if (Cond.getOpcode() == ISD::AND && + Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1)); + if (C && C->getAPIntValue() == 1) + Cond = Cond.getOperand(0); + } + // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - if (Cond.getOpcode() == X86ISD::SETCC) { + if (Cond.getOpcode() == X86ISD::SETCC || + Cond.getOpcode() == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -5885,9 +6007,18 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { Cond = LowerXALUO(Cond, DAG); #endif + // Look pass (and (setcc_carry (cmp ...)), 1). + if (Cond.getOpcode() == ISD::AND && + Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1)); + if (C && C->getAPIntValue() == 1) + Cond = Cond.getOperand(0); + } + // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - if (Cond.getOpcode() == X86ISD::SETCC) { + if (Cond.getOpcode() == X86ISD::SETCC || + Cond.getOpcode() == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -7274,6 +7405,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::COMI: return "X86ISD::COMI"; case X86ISD::UCOMI: return "X86ISD::UCOMI"; case X86ISD::SETCC: return "X86ISD::SETCC"; + case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY"; case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; @@ -8327,16 +8459,6 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, return TargetLowering::isGAPlusOffset(N, GA, Offset); } -static bool isBaseAlignmentOfN(unsigned N, SDNode *Base, - const TargetLowering &TLI) { - GlobalValue *GV; - int64_t Offset = 0; - if (TLI.isGAPlusOffset(Base, GV, Offset)) - return (GV->getAlignment() >= N && (Offset % N) == 0); - // DAG combine handles the stack object case. - return false; -} - static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, EVT EltVT, LoadSDNode *&LDBase, unsigned &LastLoadedElt, @@ -8366,7 +8488,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, continue; LoadSDNode *LD = cast<LoadSDNode>(Elt); - if (!TLI.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i, MFI)) + if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i)) return false; LastLoadedElt = i; } @@ -8399,7 +8521,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); if (LastLoadedElt == NumElems - 1) { - if (isBaseAlignmentOfN(16, LD->getBasePtr().getNode(), TLI)) + if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16) return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), LD->isVolatile()); @@ -8858,11 +8980,42 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + + // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2)) + // since the result of setcc_c is all zero's or all ones. + if (N1C && N0.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == X86ISD::SETCC_CARRY || + ((N00.getOpcode() == ISD::ANY_EXTEND || + N00.getOpcode() == ISD::ZERO_EXTEND) && + N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) { + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + APInt ShAmt = N1C->getAPIntValue(); + Mask = Mask.shl(ShAmt); + if (Mask != 0) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + N00, DAG.getConstant(Mask, VT)); + } + } + + return SDValue(); +} /// PerformShiftCombine - Transforms vector shift nodes to use vector shifts /// when possible. static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (!VT.isVector() && VT.isInteger() && + N->getOpcode() == ISD::SHL) + return PerformSHLCombine(N, DAG); + // On X86 with SSE2 support, we can transform this to a vector shift if // all elements are shifted by the same amount. We can't do this in legalize // because the a constant vector is typically transformed to a constant pool @@ -8870,7 +9023,6 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, if (!Subtarget->hasSSE2()) return SDValue(); - EVT VT = N->getValueType(0); if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) return SDValue(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 7b4ab62..64bc70c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -118,6 +118,10 @@ namespace llvm { /// operand produced by a CMP instruction. SETCC, + // Same as SETCC except it's materialized with a sbb and the value is all + // one's or all zero's. + SETCC_CARRY, + /// X86 conditional moves. Operand 0 and operand 1 are the two values /// to select from. Operand 2 is the condition code, and operand 3 is the /// flag operand produced by a CMP or TEST instruction. It also writes a @@ -626,7 +630,9 @@ namespace llvm { std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned); - + + SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, + SelectionDAG &DAG); SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG); SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG); SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index b5fa862..b6a2c05 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1333,6 +1333,15 @@ def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64] X86_COND_NO, EFLAGS))]>, TB; } // isTwoAddress +// Use sbb to materialize carry flag into a GPR. +let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in +def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), + "sbb{q}\t$dst, $dst", + [(set GR64:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; + +def : Pat<(i64 (anyext (X86setcc_c X86_COND_B, EFLAGS))), + (SETB_C64r)>; + //===----------------------------------------------------------------------===// // Conversion Instructions... // diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index a37013d..1947d35 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -783,12 +783,14 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) return Reg; // Check for post-frame index elimination operations - return hasLoadFromStackSlot(MI, FrameIndex); + const MachineMemOperand *Dummy; + return hasLoadFromStackSlot(MI, Dummy, FrameIndex); } return 0; } bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, int &FrameIndex) const { for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), oe = MI->memoperands_end(); @@ -798,6 +800,7 @@ bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, if (const FixedStackPseudoSourceValue *Value = dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { FrameIndex = Value->getFrameIndex(); + MMO = *o; return true; } } @@ -819,12 +822,14 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, if ((Reg = isStoreToStackSlot(MI, FrameIndex))) return Reg; // Check for post-frame index elimination operations - return hasStoreToStackSlot(MI, FrameIndex); + const MachineMemOperand *Dummy; + return hasStoreToStackSlot(MI, Dummy, FrameIndex); } return 0; } bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, int &FrameIndex) const { for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), oe = MI->memoperands_end(); @@ -834,6 +839,7 @@ bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, if (const FixedStackPseudoSourceValue *Value = dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { FrameIndex = Value->getFrameIndex(); + MMO = *o; return true; } } @@ -1052,6 +1058,112 @@ static bool hasLiveCondCodeDef(MachineInstr *MI) { return false; } +/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when +/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting +/// to a 32-bit superregister and then truncating back down to a 16-bit +/// subregister. +MachineInstr * +X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, + MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const { + MachineInstr *MI = MBBI; + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + bool isDead = MI->getOperand(0).isDead(); + bool isKill = MI->getOperand(1).isKill(); + + unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() + ? X86::LEA64_32r : X86::LEA32r; + MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); + unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); + unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); + + // Build and insert into an implicit UNDEF value. This is OK because + // well be shifting and then extracting the lower 16-bits. + // This has the potential to cause partial register stall. e.g. + // movw (%rbp,%rcx,2), %dx + // leal -65(%rdx), %esi + // But testing has shown this *does* help performance in 64-bit mode (at + // least on modern x86 machines). + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); + MachineInstr *InsMI = + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) + .addReg(leaInReg) + .addReg(Src, getKillRegState(isKill)) + .addImm(X86::SUBREG_16BIT); + + MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), + get(Opc), leaOutReg); + switch (MIOpc) { + default: + llvm_unreachable(0); + break; + case X86::SHL16ri: { + unsigned ShAmt = MI->getOperand(2).getImm(); + MIB.addReg(0).addImm(1 << ShAmt) + .addReg(leaInReg, RegState::Kill).addImm(0); + break; + } + case X86::INC16r: + case X86::INC64_16r: + addLeaRegOffset(MIB, leaInReg, true, 1); + break; + case X86::DEC16r: + case X86::DEC64_16r: + addLeaRegOffset(MIB, leaInReg, true, -1); + break; + case X86::ADD16ri: + case X86::ADD16ri8: + addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); + break; + case X86::ADD16rr: { + unsigned Src2 = MI->getOperand(2).getReg(); + bool isKill2 = MI->getOperand(2).isKill(); + unsigned leaInReg2 = 0; + MachineInstr *InsMI2 = 0; + if (Src == Src2) { + // ADD16rr %reg1028<kill>, %reg1028 + // just a single insert_subreg. + addRegReg(MIB, leaInReg, true, leaInReg, false); + } else { + leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass); + // Build and insert into an implicit UNDEF value. This is OK because + // well be shifting and then extracting the lower 16-bits. + BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); + InsMI2 = + BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2) + .addReg(leaInReg2) + .addReg(Src2, getKillRegState(isKill2)) + .addImm(X86::SUBREG_16BIT); + addRegReg(MIB, leaInReg, true, leaInReg2, true); + } + if (LV && isKill2 && InsMI2) + LV->replaceKillInstruction(Src2, MI, InsMI2); + break; + } + } + + MachineInstr *NewMI = MIB; + MachineInstr *ExtMI = + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) + .addReg(Dest, RegState::Define | getDeadRegState(isDead)) + .addReg(leaOutReg, RegState::Kill) + .addImm(X86::SUBREG_16BIT); + + if (LV) { + // Update live variables + LV->getVarInfo(leaInReg).Kills.push_back(NewMI); + LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); + if (isKill) + LV->replaceKillInstruction(Src, MI, InsMI); + if (isDead) + LV->replaceKillInstruction(Dest, MI, ExtMI); + } + + return ExtMI; +} + /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target /// may be able to convert a two-address instruction into a true @@ -1077,7 +1189,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr *NewMI = NULL; // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When // we have better subtarget support, enable the 16-bit LEA generation here. + // 16-bit LEA is also slow on Core2. bool DisableLEA16 = true; + bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); unsigned MIOpc = MI->getOpcode(); switch (MIOpc) { @@ -1116,8 +1230,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned ShAmt = MI->getOperand(2).getImm(); if (ShAmt == 0 || ShAmt >= 4) return 0; - unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ? - X86::LEA64_32r : X86::LEA32r; + unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) @@ -1131,51 +1244,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned ShAmt = MI->getOperand(2).getImm(); if (ShAmt == 0 || ShAmt >= 4) return 0; - if (DisableLEA16) { - // If 16-bit LEA is disabled, use 32-bit LEA via subregisters. - MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); - unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() - ? X86::LEA64_32r : X86::LEA32r; - unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); - unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); - - // Build and insert into an implicit UNDEF value. This is OK because - // well be shifting and then extracting the lower 16-bits. - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); - MachineInstr *InsMI = - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) - .addReg(leaInReg) - .addReg(Src, getKillRegState(isKill)) - .addImm(X86::SUBREG_16BIT); - - NewMI = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg) - .addReg(0).addImm(1 << ShAmt) - .addReg(leaInReg, RegState::Kill) - .addImm(0); - - MachineInstr *ExtMI = - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(leaOutReg, RegState::Kill) - .addImm(X86::SUBREG_16BIT); - - if (LV) { - // Update live variables - LV->getVarInfo(leaInReg).Kills.push_back(NewMI); - LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); - if (isKill) - LV->replaceKillInstruction(Src, MI, InsMI); - if (isDead) - LV->replaceKillInstruction(Dest, MI, ExtMI); - } - return ExtMI; - } else { - NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)) - .addImm(0); - } + if (DisableLEA16) + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; + NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | getDeadRegState(isDead)) + .addReg(0).addImm(1 << ShAmt) + .addReg(Src, getKillRegState(isKill)) + .addImm(0); break; } default: { @@ -1185,7 +1260,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (hasLiveCondCodeDef(MI)) return 0; - bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); switch (MIOpc) { default: return 0; case X86::INC64r: @@ -1202,7 +1276,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::INC16r: case X86::INC64_16r: - if (DisableLEA16) return 0; + if (DisableLEA16) + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addReg(Dest, RegState::Define | @@ -1223,7 +1298,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::DEC16r: case X86::DEC64_16r: - if (DisableLEA16) return 0; + if (DisableLEA16) + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addReg(Dest, RegState::Define | @@ -1246,7 +1322,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, break; } case X86::ADD16rr: { - if (DisableLEA16) return 0; + if (DisableLEA16) + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); @@ -1261,56 +1338,32 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD64ri32: case X86::ADD64ri8: assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - if (MI->getOperand(2).isImm()) - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, MI->getOperand(2).getImm()); + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, MI->getOperand(2).getImm()); break; case X86::ADD32ri: - case X86::ADD32ri8: + case X86::ADD32ri8: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - if (MI->getOperand(2).isImm()) { - unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), + unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); - } break; + } case X86::ADD16ri: case X86::ADD16ri8: - if (DisableLEA16) return 0; + if (DisableLEA16) + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - if (MI->getOperand(2).isImm()) - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, MI->getOperand(2).getImm()); - break; - case X86::SHL16ri: - if (DisableLEA16) return 0; - case X86::SHL32ri: - case X86::SHL64ri: { - assert(MI->getNumOperands() >= 3 && MI->getOperand(2).isImm() && - "Unknown shl instruction!"); - unsigned ShAmt = MI->getOperand(2).getImm(); - if (ShAmt == 1 || ShAmt == 2 || ShAmt == 3) { - X86AddressMode AM; - AM.Scale = 1 << ShAmt; - AM.IndexReg = Src; - unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r - : (MIOpc == X86::SHL32ri - ? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r); - NewMI = addFullAddress(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), AM); - if (isKill) - NewMI->getOperand(3).setIsKill(true); - } + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, MI->getOperand(2).getImm()); break; } - } } } @@ -1571,14 +1624,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock::iterator I = MBB.end(); while (I != MBB.begin()) { --I; - // Working from the bottom, when we see a non-terminator - // instruction, we're done. + + // Working from the bottom, when we see a non-terminator instruction, we're + // done. if (!isBrAnalysisUnpredicatedTerminator(I, *this)) break; - // A terminator that isn't a branch can't easily be handled - // by this analysis. + + // A terminator that isn't a branch can't easily be handled by this + // analysis. if (!I->getDesc().isBranch()) return true; + // Handle unconditional branches. if (I->getOpcode() == X86::JMP) { if (!AllowModify) { @@ -1587,10 +1643,12 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, } // If the block has any instructions after a JMP, delete them. - while (next(I) != MBB.end()) - next(I)->eraseFromParent(); + while (llvm::next(I) != MBB.end()) + llvm::next(I)->eraseFromParent(); + Cond.clear(); FBB = 0; + // Delete the JMP if it's equivalent to a fall-through. if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { TBB = 0; @@ -1598,14 +1656,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, I = MBB.end(); continue; } + // TBB is used to indicate the unconditinal destination. TBB = I->getOperand(0).getMBB(); continue; } + // Handle conditional branches. X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode()); if (BranchCode == X86::COND_INVALID) return true; // Can't handle indirect branch. + // Working from the bottom, handle the first conditional branch. if (Cond.empty()) { FBB = TBB; @@ -1613,24 +1674,26 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, Cond.push_back(MachineOperand::CreateImm(BranchCode)); continue; } - // Handle subsequent conditional branches. Only handle the case - // where all conditional branches branch to the same destination - // and their condition opcodes fit one of the special - // multi-branch idioms. + + // Handle subsequent conditional branches. Only handle the case where all + // conditional branches branch to the same destination and their condition + // opcodes fit one of the special multi-branch idioms. assert(Cond.size() == 1); assert(TBB); - // Only handle the case where all conditional branches branch to - // the same destination. + + // Only handle the case where all conditional branches branch to the same + // destination. if (TBB != I->getOperand(0).getMBB()) return true; - X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); + // If the conditions are the same, we can leave them alone. + X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); if (OldBranchCode == BranchCode) continue; - // If they differ, see if they fit one of the known patterns. - // Theoretically we could handle more patterns here, but - // we shouldn't expect to see them if instruction selection - // has done a reasonable job. + + // If they differ, see if they fit one of the known patterns. Theoretically, + // we could handle more patterns here, but we shouldn't expect to see them + // if instruction selection has done a reasonable job. if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_E) || (OldBranchCode == X86::COND_E && @@ -1643,6 +1706,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, BranchCode = X86::COND_NE_OR_P; else return true; + // Update the MachineOperand. Cond[0].setImm(BranchCode); } @@ -2713,27 +2777,6 @@ unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, return I->second.first; } -bool X86InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case X86::TCRETURNri: - case X86::TCRETURNdi: - case X86::RET: // Return. - case X86::RETI: - case X86::TAILJMPd: - case X86::TAILJMPr: - case X86::TAILJMPm: - case X86::JMP: // Uncond branch. - case X86::JMP32r: // Indirect branch. - case X86::JMP64r: // Indirect branch (64-bit). - case X86::JMP32m: // Indirect branch through mem. - case X86::JMP64m: // Indirect branch through mem (64-bit). - return true; - default: return false; - } -} - bool X86InstrInfo:: ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert(Cond.size() == 1 && "Invalid X86 branch condition!"); diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index c6daa25..b83441d 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -457,11 +457,14 @@ public: /// hasLoadFromStackSlot - If the specified machine instruction has /// a load from a stack slot, return true along with the FrameIndex - /// of the loaded stack slot. If not, return false. Unlike + /// of the loaded stack slot and the machine mem operand containing + /// the reference. If not, return false. Unlike /// isLoadFromStackSlot, this returns true for any instructions that /// loads from the stack. This is a hint only and may not catch all /// cases. - bool hasLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + bool hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination @@ -472,11 +475,13 @@ public: /// hasStoreToStackSlot - If the specified machine instruction has a /// store to a stack slot, return true along with the FrameIndex of - /// the loaded stack slot. If not, return false. Unlike - /// isStoreToStackSlot, this returns true for any instructions that - /// loads from the stack. This is a hint only and may not catch all - /// cases. - bool hasStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; + /// the loaded stack slot and the machine mem operand containing the + /// reference. If not, return false. Unlike isStoreToStackSlot, + /// this returns true for any instructions that loads from the + /// stack. This is a hint only and may not catch all cases. + bool hasStoreToStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const; bool isReallyTriviallyReMaterializable(const MachineInstr *MI, AliasAnalysis *AA) const; @@ -595,7 +600,6 @@ public: bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex = 0) const; - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; @@ -633,6 +637,11 @@ public: unsigned getGlobalBaseReg(MachineFunction *MF) const; private: + MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, + MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const; + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, unsigned OpNum, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 90ef1f4..3cc1853 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -87,6 +87,7 @@ def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>; def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond, [SDNPHasChain]>; def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>; +def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC>; def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, @@ -816,7 +817,7 @@ def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), let neverHasSideEffects = 1 in def LEA16r : I<0x8D, MRMSrcMem, - (outs GR16:$dst), (ins i32mem:$src), + (outs GR16:$dst), (ins lea32mem:$src), "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize; let isReMaterializable = 1 in def LEA32r : I<0x8D, MRMSrcMem, @@ -3059,6 +3060,21 @@ let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags let Uses = [EFLAGS] in { +// Use sbb to materialize carry bit. + +let Defs = [EFLAGS], isCodeGenOnly = 1 in { +def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), + "sbb{b}\t$dst, $dst", + [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), + "sbb{w}\t$dst, $dst", + [(set GR16:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>, + OpSize; +def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), + "sbb{l}\t$dst, $dst", + [(set GR32:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; +} // isCodeGenOnly + def SETEr : I<0x94, MRM0r, (outs GR8 :$dst), (ins), "sete\t$dst", @@ -4169,6 +4185,12 @@ def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)), addr:$dst), (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; +// (anyext (setcc_carry)) -> (zext (setcc_carry)) +def : Pat<(i16 (anyext (X86setcc_c X86_COND_B, EFLAGS))), + (SETB_C16r)>; +def : Pat<(i32 (anyext (X86setcc_c X86_COND_B, EFLAGS))), + (SETB_C32r)>; + //===----------------------------------------------------------------------===// // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index dfdd4ce..62841f8 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2083,7 +2083,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4i32 (pshufd:$src2 - (bc_v4i32(memopv2i64 addr:$src1)), + (bc_v4i32 (memopv2i64 addr:$src1)), (undef))))]>; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 33852bd..d96aafd 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -423,21 +423,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// -static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) { - unsigned MaxAlign = 0; - - for (int i = FFI->getObjectIndexBegin(), - e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) - continue; - - unsigned Align = FFI->getObjectAlignment(i); - MaxAlign = std::max(MaxAlign, Align); - } - - return MaxAlign; -} - /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. @@ -638,10 +623,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Calculate and set max stack object alignment early, so we can decide // whether we will need stack realignment (and thus FP). - unsigned MaxAlign = std::max(MFI->getMaxAlignment(), - calculateMaxStackAlignment(MFI)); - - MFI->setMaxAlignment(MaxAlign); + MFI->calculateMaxStackAlignment(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); @@ -741,7 +723,7 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB, if (MBBI == MBB.end()) return; - MachineBasicBlock::iterator NI = next(MBBI); + MachineBasicBlock::iterator NI = llvm::next(MBBI); if (NI == MBB.end()) return; unsigned Opc = NI->getOpcode(); @@ -775,7 +757,7 @@ static int mergeSPUpdates(MachineBasicBlock &MBB, return 0; MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; - MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI); + MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI); unsigned Opc = PI->getOpcode(); int Offset = 0; @@ -1001,7 +983,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { } // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(FramePtr); @@ -1482,45 +1464,3 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { } #include "X86GenRegisterInfo.inc" - -namespace { - struct MSAC : public MachineFunctionPass { - static char ID; - MSAC() : MachineFunctionPass(&ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF) { - MachineFrameInfo *FFI = MF.getFrameInfo(); - MachineRegisterInfo &RI = MF.getRegInfo(); - - // Calculate max stack alignment of all already allocated stack objects. - unsigned MaxAlign = calculateMaxStackAlignment(FFI); - - // Be over-conservative: scan over all vreg defs and find, whether vector - // registers are used. If yes - there is probability, that vector register - // will be spilled and thus stack needs to be aligned properly. - for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; - RegNum < RI.getLastVirtReg(); ++RegNum) - MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment()); - - if (FFI->getMaxAlignment() == MaxAlign) - return false; - - FFI->setMaxAlignment(MaxAlign); - return true; - } - - virtual const char *getPassName() const { - return "X86 Maximal Stack Alignment Calculator"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - }; - - char MSAC::ID = 0; -} - -FunctionPass* -llvm::createX86MaxStackAlignmentCalculatorPass() { return new MSAC(); } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 661f560..75cdbad 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -367,5 +367,5 @@ bool X86Subtarget::enablePostRAScheduler( RegClassVector& CriticalPathRCs) const { Mode = TargetSubtarget::ANTIDEP_CRITICAL; CriticalPathRCs.clear(); - return OptLevel >= CodeGenOpt::Default; + return OptLevel >= CodeGenOpt::Aggressive; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 0cda8bc..0152121 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -163,7 +163,7 @@ bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // Calculate and set max stack object alignment early, so we can decide // whether we will need stack realignment (and thus FP). - PM.add(createX86MaxStackAlignmentCalculatorPass()); + PM.add(createMaxStackAlignmentCalculatorPass()); return false; // -print-machineinstr shouldn't print after this. } |