summaryrefslogtreecommitdiffstats
path: root/lib/Target/X86
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2009-12-15 18:09:07 +0000
committerrdivacky <rdivacky@FreeBSD.org>2009-12-15 18:09:07 +0000
commit40a6fcdb85efd93fe0e36c9552cfb0b18b5eacd6 (patch)
tree076117cdf3579003f07cad4cdf0593347ce58150 /lib/Target/X86
parente7908924d847e63b02bc82bfaa1709ab9c774dcd (diff)
downloadFreeBSD-src-40a6fcdb85efd93fe0e36c9552cfb0b18b5eacd6.zip
FreeBSD-src-40a6fcdb85efd93fe0e36c9552cfb0b18b5eacd6.tar.gz
Update LLVM to 91430.
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/X86.h5
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h1
-rw-r--r--lib/Target/X86/X86CallingConv.td9
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp2
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp190
-rw-r--r--lib/Target/X86/X86ISelLowering.h8
-rw-r--r--lib/Target/X86/X86Instr64bit.td9
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp305
-rw-r--r--lib/Target/X86/X86InstrInfo.h25
-rw-r--r--lib/Target/X86/X86InstrInfo.td24
-rw-r--r--lib/Target/X86/X86InstrSSE.td2
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp68
-rw-r--r--lib/Target/X86/X86Subtarget.cpp2
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp2
14 files changed, 418 insertions, 234 deletions
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index a167118..684c61f 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -62,11 +62,6 @@ MCCodeEmitter *createX86MCCodeEmitter(const Target &, TargetMachine &TM);
///
FunctionPass *createEmitX86CodeToMemory();
-/// createX86MaxStackAlignmentCalculatorPass - This function returns a pass
-/// which calculates maximal stack alignment required for function
-///
-FunctionPass *createX86MaxStackAlignmentCalculatorPass();
-
extern Target TheX86_32Target, TheX86_64Target;
} // End llvm namespace
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index afd5525..5017af2 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/ADT/StringSet.h"
+#include "X86MachineFunctionInfo.h"
namespace llvm {
class X86MachineFunctionInfo;
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index d77f039..12d3d04 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -64,11 +64,18 @@ def RetCC_X86_32_C : CallingConv<[
// X86-32 FastCC return-value convention.
def RetCC_X86_32_Fast : CallingConv<[
// The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has
- // SSE2, otherwise it is the the C calling conventions.
+ // SSE2.
// This can happen when a float, 2 x float, or 3 x float vector is split by
// target lowering, and is returned in 1-3 sse regs.
CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+
+ // For integers, ECX can be used as an extra return register
+ CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>,
+ CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>,
+ CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>,
+
+ // Otherwise, it is the same as the common X86 calling convention.
CCDelegateTo<RetCC_X86Common>
]>;
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index a2fe9b0..044bd4b 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -289,7 +289,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
while (Start != BB.begin() && prior(Start) != PrevI) --Start;
errs() << "Inserted instructions:\n\t";
Start->print(errs(), &MF.getTarget());
- while (++Start != next(I)) {}
+ while (++Start != llvm::next(I)) {}
}
dumpStack();
);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index d80b8ec..0517b56 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -595,6 +595,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::TRUNCATE, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ setTruncStoreAction((MVT::SimpleValueType)VT,
+ (MVT::SimpleValueType)InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
}
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
@@ -671,8 +683,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom);
- setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v8i8, Expand);
setOperationAction(ISD::SELECT, MVT::v8i8, Promote);
setOperationAction(ISD::SELECT, MVT::v4i16, Promote);
setOperationAction(ISD::SELECT, MVT::v2i32, Promote);
@@ -3344,6 +3354,82 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
}
SDValue
+X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+ SelectionDAG &DAG) {
+
+ // Check if the scalar load can be widened into a vector load. And if
+ // the address is "base + cst" see if the cst can be "absorbed" into
+ // the shuffle mask.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
+ SDValue Ptr = LD->getBasePtr();
+ if (!ISD::isNormalLoad(LD) || LD->isVolatile())
+ return SDValue();
+ EVT PVT = LD->getValueType(0);
+ if (PVT != MVT::i32 && PVT != MVT::f32)
+ return SDValue();
+
+ int FI = -1;
+ int64_t Offset = 0;
+ if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FI = FINode->getIndex();
+ Offset = 0;
+ } else if (Ptr.getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ Offset = Ptr.getConstantOperandVal(1);
+ Ptr = Ptr.getOperand(0);
+ } else {
+ return SDValue();
+ }
+
+ SDValue Chain = LD->getChain();
+ // Make sure the stack object alignment is at least 16.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ if (DAG.InferPtrAlignment(Ptr) < 16) {
+ if (MFI->isFixedObjectIndex(FI)) {
+ // Can't change the alignment. Reference stack + offset explicitly
+ // if stack pointer is at least 16-byte aligned.
+ unsigned StackAlign = Subtarget->getStackAlignment();
+ if (StackAlign < 16)
+ return SDValue();
+ Offset = MFI->getObjectOffset(FI) + Offset;
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
+ getPointerTy());
+ Ptr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ DAG.getConstant(Offset & ~15, getPointerTy()));
+ Offset %= 16;
+ } else {
+ MFI->setObjectAlignment(FI, 16);
+ }
+ }
+
+ // (Offset % 16) must be multiple of 4. Then address is then
+ // Ptr + (Offset & ~15).
+ if (Offset < 0)
+ return SDValue();
+ if ((Offset % 16) & 3)
+ return SDValue();
+ int64_t StartOffset = Offset & ~15;
+ if (StartOffset)
+ Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(),
+ Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
+
+ int EltNo = (Offset - StartOffset) >> 2;
+ int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
+ EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
+ SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0);
+ // Canonicalize it to a v4i32 shuffle.
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getVectorShuffle(MVT::v4i32, dl, V1,
+ DAG.getUNDEF(MVT::v4i32), &Mask[0]));
+ }
+
+ return SDValue();
+}
+
+SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
// All zero's are handled with pxor, all one's are handled with pcmpeqd.
@@ -3486,8 +3572,19 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
- if (Values.size() == 1)
+ if (Values.size() == 1) {
+ if (EVTBits == 32) {
+ // Instead of a shuffle like this:
+ // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
+ // Check if it's possible to issue this instead.
+ // shuffle (vload ptr)), undef, <1, 1, 1, 1>
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue Item = Op.getOperand(Idx);
+ if (Op.getNode()->isOnlyUserOf(Item.getNode()))
+ return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
+ }
return SDValue();
+ }
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
@@ -4278,7 +4375,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
unsigned ShAmt = 0;
SDValue ShVal;
bool isShift = getSubtarget()->hasSSE2() &&
- isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+ isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
@@ -4815,6 +4912,7 @@ static SDValue
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
DebugLoc dl = GA->getDebugLoc();
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
@@ -4828,6 +4926,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue Ops[] = { Chain, TGA };
Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
}
+
+ // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+ MFI->setHasCalls(true);
+
SDValue Flag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
}
@@ -5648,6 +5750,17 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
return SDValue();
SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG);
+
+ // Use sbb x, x to materialize carry bit into a GPR.
+ // FIXME: Temporarily disabled since it breaks self-hosting. It's apparently
+ // miscompiling ARMISelDAGToDAG.cpp.
+ if (0 && !isFP && X86CC == X86::COND_B) {
+ return DAG.getNode(ISD::AND, dl, MVT::i8,
+ DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond),
+ DAG.getConstant(1, MVT::i8));
+ }
+
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86CC, MVT::i8), Cond);
}
@@ -5800,9 +5913,18 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
Cond = NewCond;
}
+ // Look pass (and (setcc_carry (cmp ...)), 1).
+ if (Cond.getOpcode() == ISD::AND &&
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (C && C->getAPIntValue() == 1)
+ Cond = Cond.getOperand(0);
+ }
+
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
- if (Cond.getOpcode() == X86ISD::SETCC) {
+ if (Cond.getOpcode() == X86ISD::SETCC ||
+ Cond.getOpcode() == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
@@ -5885,9 +6007,18 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
Cond = LowerXALUO(Cond, DAG);
#endif
+ // Look pass (and (setcc_carry (cmp ...)), 1).
+ if (Cond.getOpcode() == ISD::AND &&
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (C && C->getAPIntValue() == 1)
+ Cond = Cond.getOperand(0);
+ }
+
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
- if (Cond.getOpcode() == X86ISD::SETCC) {
+ if (Cond.getOpcode() == X86ISD::SETCC ||
+ Cond.getOpcode() == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
@@ -7274,6 +7405,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::COMI: return "X86ISD::COMI";
case X86ISD::UCOMI: return "X86ISD::UCOMI";
case X86ISD::SETCC: return "X86ISD::SETCC";
+ case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
case X86ISD::CMOV: return "X86ISD::CMOV";
case X86ISD::BRCOND: return "X86ISD::BRCOND";
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
@@ -8327,16 +8459,6 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
-static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
- const TargetLowering &TLI) {
- GlobalValue *GV;
- int64_t Offset = 0;
- if (TLI.isGAPlusOffset(Base, GV, Offset))
- return (GV->getAlignment() >= N && (Offset % N) == 0);
- // DAG combine handles the stack object case.
- return false;
-}
-
static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
EVT EltVT, LoadSDNode *&LDBase,
unsigned &LastLoadedElt,
@@ -8366,7 +8488,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
continue;
LoadSDNode *LD = cast<LoadSDNode>(Elt);
- if (!TLI.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i, MFI))
+ if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
return false;
LastLoadedElt = i;
}
@@ -8399,7 +8521,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
if (LastLoadedElt == NumElems - 1) {
- if (isBaseAlignmentOfN(16, LD->getBasePtr().getNode(), TLI))
+ if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16)
return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
LD->getSrcValue(), LD->getSrcValueOffset(),
LD->isVolatile());
@@ -8858,11 +8980,42 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
+ // since the result of setcc_c is all zero's or all ones.
+ if (N1C && N0.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
+ ((N00.getOpcode() == ISD::ANY_EXTEND ||
+ N00.getOpcode() == ISD::ZERO_EXTEND) &&
+ N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ APInt ShAmt = N1C->getAPIntValue();
+ Mask = Mask.shl(ShAmt);
+ if (Mask != 0)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ N00, DAG.getConstant(Mask, VT));
+ }
+ }
+
+ return SDValue();
+}
/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
/// when possible.
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector() && VT.isInteger() &&
+ N->getOpcode() == ISD::SHL)
+ return PerformSHLCombine(N, DAG);
+
// On X86 with SSE2 support, we can transform this to a vector shift if
// all elements are shifted by the same amount. We can't do this in legalize
// because the a constant vector is typically transformed to a constant pool
@@ -8870,7 +9023,6 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
if (!Subtarget->hasSSE2())
return SDValue();
- EVT VT = N->getValueType(0);
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
return SDValue();
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 7b4ab62..64bc70c 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -118,6 +118,10 @@ namespace llvm {
/// operand produced by a CMP instruction.
SETCC,
+ // Same as SETCC except it's materialized with a sbb and the value is all
+ // one's or all zero's.
+ SETCC_CARRY,
+
/// X86 conditional moves. Operand 0 and operand 1 are the two values
/// to select from. Operand 2 is the condition code, and operand 3 is the
/// flag operand produced by a CMP or TEST instruction. It also writes a
@@ -626,7 +630,9 @@ namespace llvm {
std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
bool isSigned);
-
+
+ SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+ SelectionDAG &DAG);
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index b5fa862..b6a2c05 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1333,6 +1333,15 @@ def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64]
X86_COND_NO, EFLAGS))]>, TB;
} // isTwoAddress
+// Use sbb to materialize carry flag into a GPR.
+let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in
+def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins),
+ "sbb{q}\t$dst, $dst",
+ [(set GR64:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>;
+
+def : Pat<(i64 (anyext (X86setcc_c X86_COND_B, EFLAGS))),
+ (SETB_C64r)>;
+
//===----------------------------------------------------------------------===//
// Conversion Instructions...
//
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index a37013d..1947d35 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -783,12 +783,14 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
return Reg;
// Check for post-frame index elimination operations
- return hasLoadFromStackSlot(MI, FrameIndex);
+ const MachineMemOperand *Dummy;
+ return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
return 0;
}
bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
int &FrameIndex) const {
for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
oe = MI->memoperands_end();
@@ -798,6 +800,7 @@ bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
if (const FixedStackPseudoSourceValue *Value =
dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
FrameIndex = Value->getFrameIndex();
+ MMO = *o;
return true;
}
}
@@ -819,12 +822,14 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
return Reg;
// Check for post-frame index elimination operations
- return hasStoreToStackSlot(MI, FrameIndex);
+ const MachineMemOperand *Dummy;
+ return hasStoreToStackSlot(MI, Dummy, FrameIndex);
}
return 0;
}
bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
int &FrameIndex) const {
for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
oe = MI->memoperands_end();
@@ -834,6 +839,7 @@ bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
if (const FixedStackPseudoSourceValue *Value =
dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
FrameIndex = Value->getFrameIndex();
+ MMO = *o;
return true;
}
}
@@ -1052,6 +1058,112 @@ static bool hasLiveCondCodeDef(MachineInstr *MI) {
return false;
}
+/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when
+/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting
+/// to a 32-bit superregister and then truncating back down to a 16-bit
+/// subregister.
+MachineInstr *
+X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
+ MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ MachineInstr *MI = MBBI;
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isKill = MI->getOperand(1).isKill();
+
+ unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::LEA64_32r : X86::LEA32r;
+ MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
+ unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ // This has the potential to cause partial register stall. e.g.
+ // movw (%rbp,%rcx,2), %dx
+ // leal -65(%rdx), %esi
+ // But testing has shown this *does* help performance in 64-bit mode (at
+ // least on modern x86 machines).
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+ MachineInstr *InsMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
+ .addReg(leaInReg)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(X86::SUBREG_16BIT);
+
+ MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
+ get(Opc), leaOutReg);
+ switch (MIOpc) {
+ default:
+ llvm_unreachable(0);
+ break;
+ case X86::SHL16ri: {
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ MIB.addReg(0).addImm(1 << ShAmt)
+ .addReg(leaInReg, RegState::Kill).addImm(0);
+ break;
+ }
+ case X86::INC16r:
+ case X86::INC64_16r:
+ addLeaRegOffset(MIB, leaInReg, true, 1);
+ break;
+ case X86::DEC16r:
+ case X86::DEC64_16r:
+ addLeaRegOffset(MIB, leaInReg, true, -1);
+ break;
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
+ break;
+ case X86::ADD16rr: {
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ unsigned leaInReg2 = 0;
+ MachineInstr *InsMI2 = 0;
+ if (Src == Src2) {
+ // ADD16rr %reg1028<kill>, %reg1028
+ // just a single insert_subreg.
+ addRegReg(MIB, leaInReg, true, leaInReg, false);
+ } else {
+ leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+ InsMI2 =
+ BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2)
+ .addReg(leaInReg2)
+ .addReg(Src2, getKillRegState(isKill2))
+ .addImm(X86::SUBREG_16BIT);
+ addRegReg(MIB, leaInReg, true, leaInReg2, true);
+ }
+ if (LV && isKill2 && InsMI2)
+ LV->replaceKillInstruction(Src2, MI, InsMI2);
+ break;
+ }
+ }
+
+ MachineInstr *NewMI = MIB;
+ MachineInstr *ExtMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(leaOutReg, RegState::Kill)
+ .addImm(X86::SUBREG_16BIT);
+
+ if (LV) {
+ // Update live variables
+ LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
+ LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
+ if (isKill)
+ LV->replaceKillInstruction(Src, MI, InsMI);
+ if (isDead)
+ LV->replaceKillInstruction(Dest, MI, ExtMI);
+ }
+
+ return ExtMI;
+}
+
/// convertToThreeAddress - This method must be implemented by targets that
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
/// may be able to convert a two-address instruction into a true
@@ -1077,7 +1189,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstr *NewMI = NULL;
// FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
// we have better subtarget support, enable the 16-bit LEA generation here.
+ // 16-bit LEA is also slow on Core2.
bool DisableLEA16 = true;
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
unsigned MIOpc = MI->getOpcode();
switch (MIOpc) {
@@ -1116,8 +1230,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
- unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ?
- X86::LEA64_32r : X86::LEA32r;
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(0).addImm(1 << ShAmt)
@@ -1131,51 +1244,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
- if (DisableLEA16) {
- // If 16-bit LEA is disabled, use 32-bit LEA via subregisters.
- MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
- unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
- ? X86::LEA64_32r : X86::LEA32r;
- unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
- unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
-
- // Build and insert into an implicit UNDEF value. This is OK because
- // well be shifting and then extracting the lower 16-bits.
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
- MachineInstr *InsMI =
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
- .addReg(leaInReg)
- .addReg(Src, getKillRegState(isKill))
- .addImm(X86::SUBREG_16BIT);
-
- NewMI = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg)
- .addReg(0).addImm(1 << ShAmt)
- .addReg(leaInReg, RegState::Kill)
- .addImm(0);
-
- MachineInstr *ExtMI =
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(leaOutReg, RegState::Kill)
- .addImm(X86::SUBREG_16BIT);
-
- if (LV) {
- // Update live variables
- LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
- LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
- if (isKill)
- LV->replaceKillInstruction(Src, MI, InsMI);
- if (isDead)
- LV->replaceKillInstruction(Dest, MI, ExtMI);
- }
- return ExtMI;
- } else {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(0).addImm(1 << ShAmt)
- .addReg(Src, getKillRegState(isKill))
- .addImm(0);
- }
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(0);
break;
}
default: {
@@ -1185,7 +1260,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (hasLiveCondCodeDef(MI))
return 0;
- bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
switch (MIOpc) {
default: return 0;
case X86::INC64r:
@@ -1202,7 +1276,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
case X86::INC16r:
case X86::INC64_16r:
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addReg(Dest, RegState::Define |
@@ -1223,7 +1298,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
case X86::DEC16r:
case X86::DEC64_16r:
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addReg(Dest, RegState::Define |
@@ -1246,7 +1322,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
break;
}
case X86::ADD16rr: {
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
@@ -1261,56 +1338,32 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD64ri32:
case X86::ADD64ri8:
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- if (MI->getOperand(2).isImm())
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, MI->getOperand(2).getImm());
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
break;
case X86::ADD32ri:
- case X86::ADD32ri8:
+ case X86::ADD32ri8: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- if (MI->getOperand(2).isImm()) {
- unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
Src, isKill, MI->getOperand(2).getImm());
- }
break;
+ }
case X86::ADD16ri:
case X86::ADD16ri8:
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- if (MI->getOperand(2).isImm())
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, MI->getOperand(2).getImm());
- break;
- case X86::SHL16ri:
- if (DisableLEA16) return 0;
- case X86::SHL32ri:
- case X86::SHL64ri: {
- assert(MI->getNumOperands() >= 3 && MI->getOperand(2).isImm() &&
- "Unknown shl instruction!");
- unsigned ShAmt = MI->getOperand(2).getImm();
- if (ShAmt == 1 || ShAmt == 2 || ShAmt == 3) {
- X86AddressMode AM;
- AM.Scale = 1 << ShAmt;
- AM.IndexReg = Src;
- unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r
- : (MIOpc == X86::SHL32ri
- ? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r);
- NewMI = addFullAddress(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)), AM);
- if (isKill)
- NewMI->getOperand(3).setIsKill(true);
- }
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
break;
}
- }
}
}
@@ -1571,14 +1624,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I = MBB.end();
while (I != MBB.begin()) {
--I;
- // Working from the bottom, when we see a non-terminator
- // instruction, we're done.
+
+ // Working from the bottom, when we see a non-terminator instruction, we're
+ // done.
if (!isBrAnalysisUnpredicatedTerminator(I, *this))
break;
- // A terminator that isn't a branch can't easily be handled
- // by this analysis.
+
+ // A terminator that isn't a branch can't easily be handled by this
+ // analysis.
if (!I->getDesc().isBranch())
return true;
+
// Handle unconditional branches.
if (I->getOpcode() == X86::JMP) {
if (!AllowModify) {
@@ -1587,10 +1643,12 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
}
// If the block has any instructions after a JMP, delete them.
- while (next(I) != MBB.end())
- next(I)->eraseFromParent();
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+
Cond.clear();
FBB = 0;
+
// Delete the JMP if it's equivalent to a fall-through.
if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
TBB = 0;
@@ -1598,14 +1656,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
I = MBB.end();
continue;
}
+
// TBB is used to indicate the unconditinal destination.
TBB = I->getOperand(0).getMBB();
continue;
}
+
// Handle conditional branches.
X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
if (BranchCode == X86::COND_INVALID)
return true; // Can't handle indirect branch.
+
// Working from the bottom, handle the first conditional branch.
if (Cond.empty()) {
FBB = TBB;
@@ -1613,24 +1674,26 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
Cond.push_back(MachineOperand::CreateImm(BranchCode));
continue;
}
- // Handle subsequent conditional branches. Only handle the case
- // where all conditional branches branch to the same destination
- // and their condition opcodes fit one of the special
- // multi-branch idioms.
+
+ // Handle subsequent conditional branches. Only handle the case where all
+ // conditional branches branch to the same destination and their condition
+ // opcodes fit one of the special multi-branch idioms.
assert(Cond.size() == 1);
assert(TBB);
- // Only handle the case where all conditional branches branch to
- // the same destination.
+
+ // Only handle the case where all conditional branches branch to the same
+ // destination.
if (TBB != I->getOperand(0).getMBB())
return true;
- X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
+
// If the conditions are the same, we can leave them alone.
+ X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
if (OldBranchCode == BranchCode)
continue;
- // If they differ, see if they fit one of the known patterns.
- // Theoretically we could handle more patterns here, but
- // we shouldn't expect to see them if instruction selection
- // has done a reasonable job.
+
+ // If they differ, see if they fit one of the known patterns. Theoretically,
+ // we could handle more patterns here, but we shouldn't expect to see them
+ // if instruction selection has done a reasonable job.
if ((OldBranchCode == X86::COND_NP &&
BranchCode == X86::COND_E) ||
(OldBranchCode == X86::COND_E &&
@@ -1643,6 +1706,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
BranchCode = X86::COND_NE_OR_P;
else
return true;
+
// Update the MachineOperand.
Cond[0].setImm(BranchCode);
}
@@ -2713,27 +2777,6 @@ unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
return I->second.first;
}
-bool X86InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case X86::TCRETURNri:
- case X86::TCRETURNdi:
- case X86::RET: // Return.
- case X86::RETI:
- case X86::TAILJMPd:
- case X86::TAILJMPr:
- case X86::TAILJMPm:
- case X86::JMP: // Uncond branch.
- case X86::JMP32r: // Indirect branch.
- case X86::JMP64r: // Indirect branch (64-bit).
- case X86::JMP32m: // Indirect branch through mem.
- case X86::JMP64m: // Indirect branch through mem (64-bit).
- return true;
- default: return false;
- }
-}
-
bool X86InstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 1 && "Invalid X86 branch condition!");
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index c6daa25..b83441d 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -457,11 +457,14 @@ public:
/// hasLoadFromStackSlot - If the specified machine instruction has
/// a load from a stack slot, return true along with the FrameIndex
- /// of the loaded stack slot. If not, return false. Unlike
+ /// of the loaded stack slot and the machine mem operand containing
+ /// the reference. If not, return false. Unlike
/// isLoadFromStackSlot, this returns true for any instructions that
/// loads from the stack. This is a hint only and may not catch all
/// cases.
- bool hasLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ bool hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
/// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
@@ -472,11 +475,13 @@ public:
/// hasStoreToStackSlot - If the specified machine instruction has a
/// store to a stack slot, return true along with the FrameIndex of
- /// the loaded stack slot. If not, return false. Unlike
- /// isStoreToStackSlot, this returns true for any instructions that
- /// loads from the stack. This is a hint only and may not catch all
- /// cases.
- bool hasStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ /// the loaded stack slot and the machine mem operand containing the
+ /// reference. If not, return false. Unlike isStoreToStackSlot,
+ /// this returns true for any instructions that loads from the
+ /// stack. This is a hint only and may not catch all cases.
+ bool hasStoreToStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
AliasAnalysis *AA) const;
@@ -595,7 +600,6 @@ public:
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex = 0) const;
- virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
@@ -633,6 +637,11 @@ public:
unsigned getGlobalBaseReg(MachineFunction *MF) const;
private:
+ MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
+ MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
unsigned OpNum,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 90ef1f4..3cc1853 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -87,6 +87,7 @@ def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
[SDNPHasChain]>;
def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
+def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC>;
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
[SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
@@ -816,7 +817,7 @@ def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
let neverHasSideEffects = 1 in
def LEA16r : I<0x8D, MRMSrcMem,
- (outs GR16:$dst), (ins i32mem:$src),
+ (outs GR16:$dst), (ins lea32mem:$src),
"lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
let isReMaterializable = 1 in
def LEA32r : I<0x8D, MRMSrcMem,
@@ -3059,6 +3060,21 @@ let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags
let Uses = [EFLAGS] in {
+// Use sbb to materialize carry bit.
+
+let Defs = [EFLAGS], isCodeGenOnly = 1 in {
+def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins),
+ "sbb{b}\t$dst, $dst",
+ [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins),
+ "sbb{w}\t$dst, $dst",
+ [(set GR16:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>,
+ OpSize;
+def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins),
+ "sbb{l}\t$dst, $dst",
+ [(set GR32:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>;
+} // isCodeGenOnly
+
def SETEr : I<0x94, MRM0r,
(outs GR8 :$dst), (ins),
"sete\t$dst",
@@ -4169,6 +4185,12 @@ def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1),
GR16:$src2, (i8 imm:$amt2)), addr:$dst),
(SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
+// (anyext (setcc_carry)) -> (zext (setcc_carry))
+def : Pat<(i16 (anyext (X86setcc_c X86_COND_B, EFLAGS))),
+ (SETB_C16r)>;
+def : Pat<(i32 (anyext (X86setcc_c X86_COND_B, EFLAGS))),
+ (SETB_C32r)>;
+
//===----------------------------------------------------------------------===//
// EFLAGS-defining Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index dfdd4ce..62841f8 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2083,7 +2083,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (pshufd:$src2
- (bc_v4i32(memopv2i64 addr:$src1)),
+ (bc_v4i32 (memopv2i64 addr:$src1)),
(undef))))]>;
}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 33852bd..d96aafd 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -423,21 +423,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
- unsigned MaxAlign = 0;
-
- for (int i = FFI->getObjectIndexBegin(),
- e = FFI->getObjectIndexEnd(); i != e; ++i) {
- if (FFI->isDeadObjectIndex(i))
- continue;
-
- unsigned Align = FFI->getObjectAlignment(i);
- MaxAlign = std::max(MaxAlign, Align);
- }
-
- return MaxAlign;
-}
-
/// hasFP - Return true if the specified function should have a dedicated frame
/// pointer register. This is true if the function has variable sized allocas
/// or if frame pointer elimination is disabled.
@@ -638,10 +623,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
- unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
- calculateMaxStackAlignment(MFI));
-
- MFI->setMaxAlignment(MaxAlign);
+ MFI->calculateMaxStackAlignment();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
@@ -741,7 +723,7 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB,
if (MBBI == MBB.end()) return;
- MachineBasicBlock::iterator NI = next(MBBI);
+ MachineBasicBlock::iterator NI = llvm::next(MBBI);
if (NI == MBB.end()) return;
unsigned Opc = NI->getOpcode();
@@ -775,7 +757,7 @@ static int mergeSPUpdates(MachineBasicBlock &MBB,
return 0;
MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
- MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI);
+ MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
unsigned Opc = PI->getOpcode();
int Offset = 0;
@@ -1001,7 +983,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
}
// Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
I != E; ++I)
I->addLiveIn(FramePtr);
@@ -1482,45 +1464,3 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
}
#include "X86GenRegisterInfo.inc"
-
-namespace {
- struct MSAC : public MachineFunctionPass {
- static char ID;
- MSAC() : MachineFunctionPass(&ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF) {
- MachineFrameInfo *FFI = MF.getFrameInfo();
- MachineRegisterInfo &RI = MF.getRegInfo();
-
- // Calculate max stack alignment of all already allocated stack objects.
- unsigned MaxAlign = calculateMaxStackAlignment(FFI);
-
- // Be over-conservative: scan over all vreg defs and find, whether vector
- // registers are used. If yes - there is probability, that vector register
- // will be spilled and thus stack needs to be aligned properly.
- for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
- RegNum < RI.getLastVirtReg(); ++RegNum)
- MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
-
- if (FFI->getMaxAlignment() == MaxAlign)
- return false;
-
- FFI->setMaxAlignment(MaxAlign);
- return true;
- }
-
- virtual const char *getPassName() const {
- return "X86 Maximal Stack Alignment Calculator";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-
- char MSAC::ID = 0;
-}
-
-FunctionPass*
-llvm::createX86MaxStackAlignmentCalculatorPass() { return new MSAC(); }
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 661f560..75cdbad 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -367,5 +367,5 @@ bool X86Subtarget::enablePostRAScheduler(
RegClassVector& CriticalPathRCs) const {
Mode = TargetSubtarget::ANTIDEP_CRITICAL;
CriticalPathRCs.clear();
- return OptLevel >= CodeGenOpt::Default;
+ return OptLevel >= CodeGenOpt::Aggressive;
}
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 0cda8bc..0152121 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -163,7 +163,7 @@ bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
- PM.add(createX86MaxStackAlignmentCalculatorPass());
+ PM.add(createMaxStackAlignmentCalculatorPass());
return false; // -print-machineinstr shouldn't print after this.
}
OpenPOWER on IntegriCloud