summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG')
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp1211
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp84
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp141
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp1056
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp33
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp426
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp49
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h34
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp66
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp325
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp51
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp1969
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp333
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h35
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp684
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp850
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h14
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp791
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp16
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp669
24 files changed, 5559 insertions, 3297 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c9c4d91..9035602 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -43,6 +42,7 @@ STATISTIC(NodesCombined , "Number of dag nodes combined");
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
namespace {
static cl::opt<bool>
@@ -185,7 +185,7 @@ namespace {
SDValue visitANY_EXTEND(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitTRUNCATE(SDNode *N);
- SDValue visitBIT_CONVERT(SDNode *N);
+ SDValue visitBITCAST(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
@@ -229,12 +229,13 @@ namespace {
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
- SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT);
+ SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
+ SDValue TransformFPLoadStorePair(SDNode *N);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
@@ -248,16 +249,19 @@ namespace {
bool isAlias(SDValue Ptr1, int64_t Size1,
const Value *SrcValue1, int SrcValueOffset1,
unsigned SrcValueAlign1,
+ const MDNode *TBAAInfo1,
SDValue Ptr2, int64_t Size2,
const Value *SrcValue2, int SrcValueOffset2,
- unsigned SrcValueAlign2) const;
+ unsigned SrcValueAlign2,
+ const MDNode *TBAAInfo2) const;
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a load.
bool FindAliasInfo(SDNode *N,
SDValue &Ptr, int64_t &Size,
const Value *&SrcValue, int &SrcValueOffset,
- unsigned &SrcValueAlignment) const;
+ unsigned &SrcValueAlignment,
+ const MDNode *&TBAAInfo) const;
/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
/// looking for a better chain (aliasing node.)
@@ -270,15 +274,15 @@ namespace {
/// Run - runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
-
+
SelectionDAG &getDAG() const { return DAG; }
-
+
/// getShiftAmountTy - Returns a type large enough to hold any valid
/// shift amount - before type legalization these can be huge.
EVT getShiftAmountTy() {
return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
}
-
+
/// isTypeLegal - This method returns true if we are running before type
/// legalization or if the specified VT is legal.
bool isTypeLegal(const EVT &VT) {
@@ -631,7 +635,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
// Replace the old value with the new one.
++NodesCombined;
- DEBUG(dbgs() << "\nReplacing.2 ";
+ DEBUG(dbgs() << "\nReplacing.2 ";
TLO.Old.getNode()->dump(&DAG);
dbgs() << "\nWith: ";
TLO.New.getNode()->dump(&DAG);
@@ -666,12 +670,13 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
: LD->getExtensionType();
Replace = true;
- return DAG.getExtLoad(ExtType, PVT, dl,
+ return DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getPointerInfo(),
MemVT, LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
}
@@ -691,7 +696,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
unsigned ExtOpc =
Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
return DAG.getNode(ExtOpc, dl, PVT, Op);
- }
+ }
}
if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
@@ -889,11 +894,12 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
: LD->getExtensionType();
- SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl,
+ SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getPointerInfo(),
MemVT, LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
@@ -975,7 +981,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
- DEBUG(dbgs() << "\nReplacing.3 ";
+ DEBUG(dbgs() << "\nReplacing.3 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
RV.getNode()->dump(&DAG);
@@ -1054,7 +1060,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::TRUNCATE: return visitTRUNCATE(N);
- case ISD::BIT_CONVERT: return visitBIT_CONVERT(N);
+ case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
case ISD::FADD: return visitFADD(N);
case ISD::FSUB: return visitFSUB(N);
@@ -1225,7 +1231,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
}
}
-
+
SDValue Result;
// If we've change things around then replace token factor.
@@ -1424,6 +1430,29 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
N0.getOperand(0).getOperand(1),
N0.getOperand(1)));
+ if (N1.getOpcode() == ISD::AND) {
+ SDValue AndOp0 = N1.getOperand(0);
+ ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
+ unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
+ unsigned DestBits = VT.getScalarType().getSizeInBits();
+
+ // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
+ // and similar xforms where the inner op is either ~0 or 0.
+ if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
+ DebugLoc DL = N->getDebugLoc();
+ return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+ }
+ }
+
+ // add (sext i1), X -> sub X, (zext i1)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getValueType() == MVT::i1 &&
+ !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
+ }
+
return SDValue();
}
@@ -1438,7 +1467,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
if (N->hasNUsesOfValue(0, 1))
return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
DAG.getNode(ISD::CARRY_FALSE,
- N->getDebugLoc(), MVT::Flag));
+ N->getDebugLoc(), MVT::Glue));
// canonicalize constant to RHS.
if (N0C && !N1C)
@@ -1447,7 +1476,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
// fold (addc x, 0) -> x + no carry out
if (N1C && N1C->isNullValue())
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
- N->getDebugLoc(), MVT::Flag));
+ N->getDebugLoc(), MVT::Glue));
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
@@ -1464,7 +1493,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
(LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
- N->getDebugLoc(), MVT::Flag));
+ N->getDebugLoc(), MVT::Glue));
}
return SDValue();
@@ -1489,6 +1518,22 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
return SDValue();
}
+// Since it may not be valid to emit a fold to zero for vector initializers
+// check if we can before folding.
+static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT,
+ SelectionDAG &DAG, bool LegalOperations) {
+ if (!VT.isVector()) {
+ return DAG.getConstant(0, VT);
+ } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+ // Produce a vector of zeros.
+ SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+ std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
+ &Ops[0], Ops.size());
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1503,8 +1548,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// fold (sub x, x) -> 0
+ // FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
- return DAG.getConstant(0, N->getValueType(0));
+ return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
// fold (sub c1, c2) -> c1-c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
@@ -1515,6 +1561,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
if (N0C && N0C->isAllOnesValue())
return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+ // fold A-(A-B) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
+ return N1.getOperand(1);
// fold (A+B)-A -> B
if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
return N0.getOperand(1);
@@ -1897,6 +1946,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
// fold (mulhs x, 0) -> 0
if (N1C && N1C->isNullValue())
@@ -1910,6 +1960,22 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
+ // If the type twice as wide is legal, transform the mulhs to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, getShiftAmountTy()));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
return SDValue();
}
@@ -1918,6 +1984,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
// fold (mulhu x, 0) -> 0
if (N1C && N1C->isNullValue())
@@ -1929,6 +1996,22 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, getShiftAmountTy()));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
return SDValue();
}
@@ -1992,6 +2075,29 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
if (Res.getNode()) return Res;
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, getShiftAmountTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
return SDValue();
}
@@ -1999,6 +2105,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
if (Res.getNode()) return Res;
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, getShiftAmountTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
return SDValue();
}
@@ -2116,7 +2245,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N0Op0 = N0.getOperand(0);
APInt Mask = ~N1C->getAPIntValue();
- Mask.trunc(N0Op0.getValueSizeInBits());
+ Mask = Mask.trunc(N0Op0.getValueSizeInBits());
if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
N0.getValueType(), N0Op0);
@@ -2198,10 +2327,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
LN0->getChain(), LN0->getBasePtr(),
- LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getPointerInfo(), MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
AddToWorkList(N);
@@ -2221,10 +2349,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
AddToWorkList(N);
@@ -2253,18 +2381,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (ExtVT == LoadedVT &&
(!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
-
- SDValue NewLoad =
- DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
+
+ SDValue NewLoad =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
LN0->getChain(), LN0->getBasePtr(),
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getPointerInfo(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
AddToWorkList(N);
CombineTo(LN0, NewLoad, NewLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
-
+
// Do not change the width of a volatile load.
// Do not generate loads of non-round integer types since these can
// be expensive (and would be wrong if the type is not byte sized).
@@ -2288,12 +2416,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
AddToWorkList(NewPtr.getNode());
-
+
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
SDValue Load =
- DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getPointerInfo(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
Alignment);
AddToWorkList(N);
@@ -2722,17 +2850,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
N01C->getAPIntValue(), VT));
}
// fold (xor x, x) -> 0
- if (N0 == N1) {
- if (!VT.isVector()) {
- return DAG.getConstant(0, VT);
- } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){
- // Produce a vector of zeros.
- SDValue El = DAG.getConstant(0, VT.getVectorElementType());
- std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
- &Ops[0], Ops.size());
- }
- }
+ if (N0 == N1)
+ return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
if (N0.getOpcode() == N1.getOpcode()) {
@@ -2810,7 +2929,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
LHS->getOperand(1), N->getOperand(1));
// Create the new shift.
- SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(),
+ SDValue NewShift = DAG.getNode(N->getOpcode(),
+ LHS->getOperand(0).getDebugLoc(),
VT, LHS->getOperand(0), N->getOperand(1));
// Create the new binop.
@@ -2850,7 +2970,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
- TruncC.trunc(TruncVT.getSizeInBits());
+ TruncC = TruncC.trunc(TruncVT.getSizeInBits());
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
DAG.getNode(ISD::TRUNCATE,
@@ -2868,11 +2988,37 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
- if (c1 + c2 > OpSizeInBits)
+ if (c1 + c2 >= OpSizeInBits)
return DAG.getConstant(0, VT);
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getConstant(c1 + c2, N1.getValueType()));
}
+
+ // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+ // For this to be valid, the second form must not preserve any of the bits
+ // that are shifted out by the inner shift in the first form. This means
+ // the outer shift size must be >= the number of bits added by the ext.
+ // As a corollary, we don't care what kind of ext it is.
+ if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND) &&
+ N0.getOperand(0).getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ if (c2 >= OpSizeInBits - InnerShiftSize) {
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT,
+ DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT,
+ N0.getOperand(0)->getOperand(0)),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+ }
+
// fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
// (srl (and x, (shl -1, c1)), (sub c1, c2))
if (N1C && N0.getOpcode() == ISD::SRL &&
@@ -2973,7 +3119,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (N01C && N1C) {
// Determine what the truncate's result bitsize and type would be.
EVT TruncVT =
- EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue());
+ EVT::getIntegerVT(*DAG.getContext(),
+ OpSizeInBits - N1C->getZExtValue());
// Determine the residual right-shift amount.
signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
@@ -3006,7 +3153,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
- TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
+ TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::AND, N->getDebugLoc(),
TruncVT,
@@ -3017,6 +3164,29 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
}
+ // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+ // if c1 is equal to the number of bits the trunc removes
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (N0.getOperand(0).getOpcode() == ISD::SRL ||
+ N0.getOperand(0).getOpcode() == ISD::SRA) &&
+ N0.getOperand(0).hasOneUse() &&
+ N0.getOperand(0).getOperand(1).hasOneUse() &&
+ N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
+ EVT LargeVT = N0.getOperand(0).getValueType();
+ ConstantSDNode *LargeShiftAmt =
+ cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
+
+ if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
+ LargeShiftAmt->getZExtValue()) {
+ SDValue Amt =
+ DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
+ getShiftAmountTy());
+ SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT,
+ N0.getOperand(0).getOperand(0), Amt);
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA);
+ }
+ }
+
// Simplify, based on bits shifted out of the LHS.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -3065,12 +3235,33 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
- if (c1 + c2 > OpSizeInBits)
+ if (c1 + c2 >= OpSizeInBits)
return DAG.getConstant(0, VT);
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getConstant(c1 + c2, N1.getValueType()));
}
-
+
+ // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+ if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(0).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+ if (c1 + OpSizeInBits == InnerShiftSize) {
+ if (c1 + c2 >= InnerShiftSize)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT,
+ DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT,
+ N0.getOperand(0)->getOperand(0),
+ DAG.getConstant(c1 + c2, ShiftCountVT)));
+ }
+ }
+
// fold (srl (shl x, c), c) -> (and x, cst2)
if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
N0.getValueSizeInBits() <= 64) {
@@ -3078,7 +3269,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getConstant(~0ULL >> ShAmt, VT));
}
-
+
// fold (srl (anyextend x), c) -> (anyextend (srl x, c))
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
@@ -3147,7 +3338,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
- TruncC.trunc(TruncVT.getSizeInBits());
+ TruncC = TruncC.trunc(TruncVT.getSizeInBits());
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::AND, N->getDebugLoc(),
TruncVT,
@@ -3182,7 +3373,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// brcond i32 %c ...
//
// into
- //
+ //
// %a = ...
// %b = and %a, 2
// %c = setcc eq %b, 0
@@ -3422,7 +3613,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
}
if (BothLiveOut)
// Both unextended and extended values are live out. There had better be
- // good a reason for the transformation.
+ // a good reason for the transformation.
return ExtendNodes.size();
}
return true;
@@ -3503,10 +3694,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -3547,10 +3737,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -3611,7 +3801,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
N0.getOperand(0), N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get()),
NegOne, DAG.getConstant(0, VT));
- }
+ }
// fold (sext x) -> (zext x) if the sign bit is known zero.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
@@ -3652,6 +3842,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate x)) -> (and x, mask)
if (N0.getOpcode() == ISD::TRUNCATE &&
(!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
@@ -3677,7 +3881,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask.zext(VT.getSizeInBits());
+ Mask = Mask.zext(VT.getSizeInBits());
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
X, DAG.getConstant(Mask, VT));
}
@@ -3692,10 +3896,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -3736,10 +3939,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -3805,21 +4008,27 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
isa<ConstantSDNode>(N0.getOperand(1)) &&
N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
N0.hasOneUse()) {
+ SDValue ShAmt = N0.getOperand(1);
+ unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
if (N0.getOpcode() == ISD::SHL) {
+ SDValue InnerZExt = N0.getOperand(0);
// If the original shl may be shifting out bits, do not perform this
// transformation.
- unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
- unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() -
- N0.getOperand(0).getOperand(0).getValueType().getSizeInBits();
- if (ShAmt > KnownZeroBits)
+ unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
+ InnerZExt.getOperand(0).getValueType().getSizeInBits();
+ if (ShAmtVal > KnownZeroBits)
return SDValue();
}
- DebugLoc dl = N->getDebugLoc();
- return DAG.getNode(N0.getOpcode(), dl, VT,
- DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
- DAG.getNode(ISD::ZERO_EXTEND, dl,
- N0.getOperand(1).getValueType(),
- N0.getOperand(1)));
+
+ DebugLoc DL = N->getDebugLoc();
+
+ // Ensure that the shift amount is wide enough for the shifted value.
+ if (VT.getSizeInBits() >= 256)
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+
+ return DAG.getNode(N0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
+ ShAmt);
}
return SDValue();
@@ -3879,7 +4088,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask.zext(VT.getSizeInBits());
+ Mask = Mask.zext(VT.getSizeInBits());
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
X, DAG.getConstant(Mask, VT));
}
@@ -3894,10 +4103,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -3938,11 +4146,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
- SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT,
- N->getDebugLoc(),
- LN0->getChain(), LN0->getBasePtr(),
- LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -4053,11 +4259,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (Opc == ISD::SIGN_EXTEND_INREG) {
ExtType = ISD::SEXTLOAD;
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
- if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
- return SDValue();
} else if (Opc == ISD::SRL) {
- // Annother special-case: SRL is basically zero-extending a narrower
- // value.
+ // Another special-case: SRL is basically zero-extending a narrower value.
ExtType = ISD::ZEXTLOAD;
N0 = SDValue(N, 0);
ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
@@ -4065,10 +4268,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ExtVT = EVT::getIntegerVT(*DAG.getContext(),
VT.getSizeInBits() - N01->getZExtValue());
}
+ if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
+ return SDValue();
unsigned EVTBits = ExtVT.getSizeInBits();
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!ExtVT.isRound())
+ return SDValue();
+
unsigned ShAmt = 0;
- if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) {
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
ShAmt = N01->getZExtValue();
// Is the shift amount a multiple of size of VT?
@@ -4078,52 +4289,88 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
return SDValue();
}
+
+ // At this point, we must have a load or else we can't do the transform.
+ if (!isa<LoadSDNode>(N0)) return SDValue();
+
+ // If the shift amount is larger than the input type then we're not
+ // accessing any of the loaded bytes. If the load was a zextload/extload
+ // then the result of the shift+trunc is zero/undef (handled elsewhere).
+ // If the load was a sextload then the result is a splat of the sign bit
+ // of the extended byte. This is not worth optimizing for.
+ if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
+ return SDValue();
}
}
- // Do not generate loads of non-round integer types since these can
- // be expensive (and would be wrong if the type is not byte sized).
- if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
- cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits &&
- // Do not change the width of a volatile load.
- !cast<LoadSDNode>(N0)->isVolatile()) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT PtrType = N0.getOperand(1).getValueType();
-
- // For big endian targets, we need to adjust the offset to the pointer to
- // load the correct bytes.
- if (TLI.isBigEndian()) {
- unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
- unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
- ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
- }
-
- uint64_t PtrOff = ShAmt / 8;
- unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
- SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
- PtrType, LN0->getBasePtr(),
- DAG.getConstant(PtrOff, PtrType));
- AddToWorkList(NewPtr.getNode());
-
- SDValue Load = (ExtType == ISD::NON_EXTLOAD)
- ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
- LN0->isVolatile(), LN0->isNonTemporal(), NewAlign)
- : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
- ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- NewAlign);
-
- // Replace the old load's chain with the new load's chain.
- WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
- &DeadNodes);
+ // If the load is shifted left (and the result isn't shifted back right),
+ // we can fold the truncate through the shift.
+ unsigned ShLeftAmt = 0;
+ if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+ ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShLeftAmt = N01->getZExtValue();
+ N0 = N0.getOperand(0);
+ }
+ }
+
+ // If we haven't found a load, we can't narrow it. Don't transform one with
+ // multiple uses, this would require adding a new load.
+ if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() ||
+ // Don't change the width of a volatile load.
+ cast<LoadSDNode>(N0)->isVolatile())
+ return SDValue();
+
+ // Verify that we are actually reducing a load width here.
+ if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits)
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT PtrType = N0.getOperand(1).getValueType();
+
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian()) {
+ unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+ ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+ }
+
+ uint64_t PtrOff = ShAmt / 8;
+ unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
+ PtrType, LN0->getBasePtr(),
+ DAG.getConstant(PtrOff, PtrType));
+ AddToWorkList(NewPtr.getNode());
+
+ SDValue Load;
+ if (ExtType == ISD::NON_EXTLOAD)
+ Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(), NewAlign);
+ else
+ Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ NewAlign);
+
+ // Replace the old load's chain with the new load's chain.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
+ &DeadNodes);
- // Return the new loaded value.
- return Load;
+ // Shift the result left, if we've swallowed a left shift.
+ SDValue Result = Load;
+ if (ShLeftAmt != 0) {
+ EVT ShImmTy = getShiftAmountTy();
+ if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+ ShImmTy = VT;
+ Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
+ Result, DAG.getConstant(ShLeftAmt, ShImmTy));
}
- return SDValue();
+ // Return the new loaded value.
+ return Result;
}
SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
@@ -4196,10 +4443,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), EVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ EVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -4213,10 +4460,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), EVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ EVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -4295,7 +4542,9 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
- if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+ LD1->getPointerInfo().getAddrSpace() !=
+ LD2->getPointerInfo().getAddrSpace())
return SDValue();
EVT LD1VT = LD1->getValueType(0);
@@ -4313,14 +4562,14 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
if (NewAlign <= Align &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
- LD1->getBasePtr(), LD1->getSrcValue(),
- LD1->getSrcValueOffset(), false, false, Align);
+ LD1->getBasePtr(), LD1->getPointerInfo(),
+ false, false, Align);
}
return SDValue();
}
-SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
+SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -4344,12 +4593,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
assert(!DestEltVT.isVector() &&
"Element type of vector ValueType must not be vector!");
if (isSimple)
- return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+ return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
}
// If the input is a constant, let getNode fold it.
if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
- SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
+ SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0);
if (Res.getNode() != N) {
if (!LegalOperations ||
TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
@@ -4365,8 +4614,8 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
}
// (conv (conv x, t1), t2) -> (conv x, t2)
- if (N0.getOpcode() == ISD::BIT_CONVERT)
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT,
+ if (N0.getOpcode() == ISD::BITCAST)
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT,
N0.getOperand(0));
// fold (conv (load x)) -> (load (conv*)x)
@@ -4382,13 +4631,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
if (Align <= OrigAlign) {
SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
- LN0->getBasePtr(),
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
LN0->isVolatile(), LN0->isNonTemporal(),
OrigAlign);
AddToWorkList(N);
CombineTo(N0.getNode(),
- DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
N0.getValueType(), Load),
Load.getValue(1));
return Load;
@@ -4400,7 +4648,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
// This often reduces constant pool loads.
if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
- SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT,
+ SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
N0.getOperand(0));
AddToWorkList(NewConv.getNode());
@@ -4423,7 +4671,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
if (isTypeLegal(IntXVT)) {
- SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
IntXVT, N0.getOperand(1));
AddToWorkList(X.getNode());
@@ -4448,7 +4696,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
X, DAG.getConstant(SignBit, VT));
AddToWorkList(X.getNode());
- SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
VT, N0.getOperand(0));
Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
Cst, DAG.getConstant(~SignBit, VT));
@@ -4473,11 +4721,11 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
return CombineConsecutiveLoads(N, VT);
}
-/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
+/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
/// destination element value type.
SDValue DAGCombiner::
-ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
// If this is already the right type, we're done.
@@ -4495,10 +4743,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// Due to the FP element handling below calling this routine recursively,
// we can end up with a scalar-to-vector node here.
if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
- DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+ DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
DstEltVT, BV->getOperand(0)));
-
+
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
SDValue Op = BV->getOperand(i);
@@ -4506,7 +4754,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// are promoted and implicitly truncated. Make that explicit here.
if (Op.getValueType() != SrcEltVT)
Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
- Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+ Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
DstEltVT, Op));
AddToWorkList(Ops.back().getNode());
}
@@ -4522,7 +4770,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// same sizes.
assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
- BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
+ BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
SrcEltVT = IntVT;
}
@@ -4531,10 +4779,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
if (DstEltVT.isFloatingPoint()) {
assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
- SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
+ SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
// Next, convert to FP elements of the same size.
- return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);
+ return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
}
// Okay, we know the src/dst types are both integers of differing types.
@@ -4556,7 +4804,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
if (Op.getOpcode() == ISD::UNDEF) continue;
EltIsUndef = false;
- NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
+ NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
zextOrTrunc(SrcBitSize).zext(DstBitSize);
}
@@ -4586,13 +4834,13 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
continue;
}
- APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))->
- getAPIntValue()).zextOrTrunc(SrcBitSize);
+ APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
+ getAPIntValue().zextOrTrunc(SrcBitSize);
for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
- APInt ThisVal = APInt(OpVal).trunc(DstBitSize);
+ APInt ThisVal = OpVal.trunc(DstBitSize);
Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
- if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal)
+ if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
// Simply turn this into a SCALAR_TO_VECTOR of the new type.
return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
Ops[0]);
@@ -4984,10 +5232,9 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -5011,7 +5258,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
// Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BIT_CONVERT &&
+ if (N0.getOpcode() == ISD::BITCAST &&
!VT.isVector() &&
N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger()) {
@@ -5021,7 +5268,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
AddToWorkList(Int.getNode());
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
VT, Int);
}
}
@@ -5047,7 +5294,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
// Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
+ if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger() &&
!N0.getOperand(0).getValueType().isVector()) {
SDValue Int = N0.getOperand(0);
@@ -5056,7 +5303,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
AddToWorkList(Int.getNode());
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
N->getValueType(0), Int);
}
}
@@ -5084,14 +5331,17 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
N1.getOperand(0), N1.getOperand(1), N2);
}
- SDNode *Trunc = 0;
- if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
- // Look past truncate.
- Trunc = N1.getNode();
- N1 = N1.getOperand(0);
- }
+ if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
+ ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
+ (N1.getOperand(0).hasOneUse() &&
+ N1.getOperand(0).getOpcode() == ISD::SRL))) {
+ SDNode *Trunc = 0;
+ if (N1.getOpcode() == ISD::TRUNCATE) {
+ // Look pass the truncate.
+ Trunc = N1.getNode();
+ N1 = N1.getOperand(0);
+ }
- if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
// Match this pattern so that we can generate simpler code:
//
// %a = ...
@@ -5100,7 +5350,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
// brcond i32 %c ...
//
// into
- //
+ //
// %a = ...
// %b = and i32 %a, 2
// %c = setcc eq %b, 0
@@ -5146,8 +5396,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
}
}
}
+
+ if (Trunc)
+ // Restore N1 if the above transformation doesn't match.
+ N1 = N->getOperand(1);
}
-
+
// Transform br(xor(x, y)) -> br(x != y)
// Transform br(xor(xor(x,y), 1)) -> br (x == y)
if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
@@ -5181,9 +5435,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Equal = true;
}
- SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1;
-
- EVT SetCCVT = NodeToReplace.getValueType();
+ EVT SetCCVT = N1.getValueType();
if (LegalTypes)
SetCCVT = TLI.getSetCCResultType(SetCCVT);
SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
@@ -5192,9 +5444,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Equal ? ISD::SETEQ : ISD::SETNE);
// Replace the uses of XOR with SETCC
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes);
- removeFromWorkList(NodeToReplace.getNode());
- DAG.DeleteNode(NodeToReplace.getNode());
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
MVT::Other, Chain, SetCC, N2);
}
@@ -5568,10 +5820,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > LD->getAlignment())
- return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
- N->getDebugLoc(),
- Chain, Ptr, LD->getSrcValue(),
- LD->getSrcValueOffset(), LD->getMemoryVT(),
+ return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+ LD->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(),
+ LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(), Align);
}
}
@@ -5587,15 +5839,13 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Replace the chain to void dependency.
if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
- BetterChain, Ptr,
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ BetterChain, Ptr, LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
LD->getAlignment());
} else {
- ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
- LD->getDebugLoc(),
- BetterChain, Ptr, LD->getSrcValue(),
- LD->getSrcValueOffset(),
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
+ LD->getValueType(0),
+ BetterChain, Ptr, LD->getPointerInfo(),
LD->getMemoryVT(),
LD->isVolatile(),
LD->isNonTemporal(),
@@ -5605,10 +5855,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Create token factor to keep old chain connected.
SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
MVT::Other, Chain, ReplLoad.getValue(1));
-
+
// Make sure the new and old chains are cleaned up.
AddToWorkList(Token.getNode());
-
+
// Replace uses with load result and token factor. Don't add users
// to work list.
return CombineTo(N, ReplLoad.getValue(0), Token, false);
@@ -5628,17 +5878,17 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
static std::pair<unsigned, unsigned>
CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
std::pair<unsigned, unsigned> Result(0, 0);
-
+
// Check for the structure we're looking for.
if (V->getOpcode() != ISD::AND ||
!isa<ConstantSDNode>(V->getOperand(1)) ||
!ISD::isNormalLoad(V->getOperand(0).getNode()))
return Result;
-
+
// Check the chain and pointer.
LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
-
+
// The store should be chained directly to the load or be an operand of a
// tokenfactor.
if (LD == Chain.getNode())
@@ -5654,7 +5904,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
}
if (!isOk) return Result;
}
-
+
// This only handles simple types.
if (V.getValueType() != MVT::i16 &&
V.getValueType() != MVT::i32 &&
@@ -5670,7 +5920,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
if (NotMaskLZ == 64) return Result; // All zero mask.
-
+
// See if we have a continuous run of bits. If so, we have 0*1+0*
if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
return Result;
@@ -5678,19 +5928,19 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
if (V.getValueType() != MVT::i64 && NotMaskLZ)
NotMaskLZ -= 64-V.getValueSizeInBits();
-
+
unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
switch (MaskedBytes) {
- case 1:
- case 2:
+ case 1:
+ case 2:
case 4: break;
default: return Result; // All one mask, or 5-byte mask.
}
-
+
// Verify that the first bit starts at a multiple of mask so that the access
// is aligned the same as the access width.
if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
-
+
Result.first = MaskedBytes;
Result.second = NotMaskTZ/8;
return Result;
@@ -5707,20 +5957,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
unsigned NumBytes = MaskInfo.first;
unsigned ByteShift = MaskInfo.second;
SelectionDAG &DAG = DC->getDAG();
-
+
// Check to see if IVal is all zeros in the part being masked in by the 'or'
// that uses this. If not, this is not a replacement.
APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
ByteShift*8, (ByteShift+NumBytes)*8);
if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
-
+
// Check that it is legal on the target to do this. It is legal if the new
// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
// legalization.
MVT VT = MVT::getIntegerVT(NumBytes*8);
if (!DC->isTypeLegal(VT))
return 0;
-
+
// Okay, we can do this! Replace the 'St' store with a store of IVal that is
// shifted by ByteShift and truncated down to NumBytes.
if (ByteShift)
@@ -5735,20 +5985,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
StOffset = ByteShift;
else
StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
-
+
SDValue Ptr = St->getBasePtr();
if (StOffset) {
Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
NewAlign = MinAlign(NewAlign, StOffset);
}
-
+
// Truncate down to the new size.
IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
-
+
++OpsNarrowed;
- return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
- St->getSrcValue(), St->getSrcValueOffset()+StOffset,
+ return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
+ St->getPointerInfo().getWithOffset(StOffset),
false, false, NewAlign).getNode();
}
@@ -5771,7 +6021,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
return SDValue();
unsigned Opc = Value.getOpcode();
-
+
// If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
// is a byte mask indicating a consecutive number of bytes, check to see if
// Y is known to provide just those bytes. If so, we try to replace the
@@ -5784,7 +6034,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
Value.getOperand(1), ST,this))
return SDValue(NewST, 0);
-
+
// Or is commutative, so try swapping X and Y.
MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
if (MaskedLoad.first)
@@ -5792,7 +6042,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
Value.getOperand(0), ST,this))
return SDValue(NewST, 0);
}
-
+
if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
Value.getOperand(1).getOpcode() != ISD::Constant)
return SDValue();
@@ -5801,7 +6051,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
Chain == SDValue(N0.getNode(), 1)) {
LoadSDNode *LD = cast<LoadSDNode>(N0);
- if (LD->getBasePtr() != Ptr)
+ if (LD->getBasePtr() != Ptr ||
+ LD->getPointerInfo().getAddrSpace() !=
+ ST->getPointerInfo().getAddrSpace())
return SDValue();
// Find the type to narrow it the load / op / store to.
@@ -5850,14 +6102,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
DAG.getConstant(PtrOff, Ptr.getValueType()));
SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
LD->getChain(), NewPtr,
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getPointerInfo().getWithOffset(PtrOff),
LD->isVolatile(), LD->isNonTemporal(),
NewAlign);
SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
DAG.getConstant(NewImm, NewVT));
SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
NewVal, NewPtr,
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo().getWithOffset(PtrOff),
false, false, NewAlign);
AddToWorkList(NewPtr.getNode());
@@ -5874,6 +6126,63 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
return SDValue();
}
+/// TransformFPLoadStorePair - For a given floating point load / store pair,
+/// if the load value isn't used by any other operations, then consider
+/// transforming the pair to integer load / store operations if the target
+/// deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+ Value.hasOneUse() &&
+ Chain == SDValue(Value.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(Value);
+ EVT VT = LD->getMemoryVT();
+ if (!VT.isFloatingPoint() ||
+ VT != ST->getMemoryVT() ||
+ LD->isNonTemporal() ||
+ ST->isNonTemporal() ||
+ LD->getPointerInfo().getAddrSpace() != 0 ||
+ ST->getPointerInfo().getAddrSpace() != 0)
+ return SDValue();
+
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+ !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+ return SDValue();
+
+ unsigned LDAlign = LD->getAlignment();
+ unsigned STAlign = ST->getAlignment();
+ const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+ if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ return SDValue();
+
+ SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
+ LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ false, false, LDAlign);
+
+ SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
+ NewLD, ST->getBasePtr(),
+ ST->getPointerInfo(),
+ false, false, STAlign);
+
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewST.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
+ &DeadNodes);
+ ++LdStFP2Int;
+ return NewST;
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -5882,7 +6191,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// If this is a store of a bit convert, store the input value if the
// resultant store does not need a higher alignment than the original.
- if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
+ if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
ST->isUnindexed()) {
unsigned OrigAlign = ST->getAlignment();
EVT SVT = Value.getOperand(0).getValueType();
@@ -5892,8 +6201,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
((!LegalOperations && !ST->isVolatile()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->isVolatile(),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), OrigAlign);
}
@@ -5917,8 +6225,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
bitcastToAPInt().getZExtValue(), MVT::i32);
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->isVolatile(),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
}
break;
@@ -5929,8 +6236,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
getZExtValue(), MVT::i64);
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->isVolatile(),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
} else if (!ST->isVolatile() &&
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
@@ -5942,23 +6248,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
if (TLI.isBigEndian()) std::swap(Lo, Hi);
- int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(),
+ Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal,
ST->getAlignment());
Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
DAG.getConstant(4, Ptr.getValueType()));
- SVOffset += 4;
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
- Ptr, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal,
+ Ptr, ST->getPointerInfo().getWithOffset(4),
+ isVolatile, isNonTemporal,
Alignment);
return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
St0, St1);
@@ -5974,12 +6277,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > ST->getAlignment())
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->getMemoryVT(),
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(), Align);
}
}
+ // Try transforming a pair floating point load / store ops to integer
+ // load / store ops.
+ SDValue NewST = TransformFPLoadStorePair(N);
+ if (NewST.getNode())
+ return NewST;
+
if (CombinerAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
@@ -5991,12 +6299,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Replace the chain to avoid dependency.
if (ST->isTruncatingStore()) {
ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
- ST->getSrcValue(),ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->getMemoryVT(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
} else {
ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
}
@@ -6030,17 +6338,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
AddToWorkList(Value.getNode());
if (Shorter.getNode())
return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->getMemoryVT(),
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
// Otherwise, see if we can simplify the operation with
// SimplifyDemandedBits, which only works if the value has a single use.
if (SimplifyDemandedBits(Value,
- APInt::getLowBitsSet(
- Value.getValueType().getScalarType().getSizeInBits(),
- ST->getMemoryVT().getScalarType().getSizeInBits())))
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getScalarType().getSizeInBits())))
return SDValue(N, 0);
}
@@ -6064,8 +6371,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
ST->getMemoryVT())) {
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->getMemoryVT(),
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
}
@@ -6082,6 +6388,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (InVal.getOpcode() == ISD::UNDEF)
return InVec;
+ EVT VT = InVec.getValueType();
+
+ // If we can't generate a legal BUILD_VECTOR, exit
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return SDValue();
+
// If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
// vector with the inserted element.
if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
@@ -6091,13 +6403,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (Elt < Ops.size())
Ops[Elt] = InVal;
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
- InVec.getValueType(), &Ops[0], Ops.size());
+ VT, &Ops[0], Ops.size());
}
- // If the invec is an UNDEF and if EltNo is a constant, create a new
+ // If the invec is an UNDEF and if EltNo is a constant, create a new
// BUILD_VECTOR with undef elements and the inserted element.
- if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&
+ if (InVec.getOpcode() == ISD::UNDEF &&
isa<ConstantSDNode>(EltNo)) {
- EVT VT = InVec.getValueType();
EVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
@@ -6106,7 +6417,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (Elt < Ops.size())
Ops[Elt] = InVal;
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
- InVec.getValueType(), &Ops[0], Ops.size());
+ VT, &Ops[0], Ops.size());
}
return SDValue();
}
@@ -6138,14 +6449,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue EltNo = N->getOperand(1);
if (isa<ConstantSDNode>(EltNo)) {
- unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
bool NewLoad = false;
bool BCNumEltsChanged = false;
EVT VT = InVec.getValueType();
EVT ExtVT = VT.getVectorElementType();
EVT LVT = ExtVT;
- if (InVec.getOpcode() == ISD::BIT_CONVERT) {
+ if (InVec.getOpcode() == ISD::BITCAST) {
EVT BCVT = InVec.getOperand(0).getValueType();
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
return SDValue();
@@ -6176,10 +6487,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// Select the input vector, guarding against out of range extract vector.
unsigned NumElems = VT.getVectorNumElements();
- int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt);
+ int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
- if (InVec.getOpcode() == ISD::BIT_CONVERT)
+ if (InVec.getOpcode() == ISD::BITCAST)
InVec = InVec.getOperand(0);
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
@@ -6190,12 +6501,17 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())
return SDValue();
+ // If Idx was -1 above, Elt is going to be -1, so just return undef.
+ if (Elt == -1)
+ return DAG.getUNDEF(LN0->getBasePtr().getValueType());
+
unsigned Align = LN0->getAlignment();
if (NewLoad) {
// Check the resultant load doesn't need a higher alignment than the
// original load.
unsigned NewAlign =
- TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
+ TLI.getTargetData()
+ ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
return SDValue();
@@ -6204,8 +6520,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
SDValue NewPtr = LN0->getBasePtr();
+ unsigned PtrOff = 0;
+
if (Elt) {
- unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
+ PtrOff = LVT.getSizeInBits() * Elt / 8;
EVT PtrType = NewPtr.getValueType();
if (TLI.isBigEndian())
PtrOff = VT.getSizeInBits() / 8 - PtrOff;
@@ -6214,7 +6532,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->isVolatile(), LN0->isNonTemporal(), Align);
}
@@ -6280,7 +6598,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
if (ExtIndex > VT.getVectorNumElements())
return SDValue();
-
+
Mask.push_back(ExtIndex);
continue;
}
@@ -6328,15 +6646,16 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// FIXME: implement canonicalizations from DAG.getVectorShuffle()
- // If it is a splat, check if the argument vector is a build_vector with
- // all scalar elements the same.
- if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
+ // If it is a splat, check if the argument vector is another splat or a
+ // build_vector with all scalar elements the same.
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
SDNode *V = N0.getNode();
// If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to
// look though conversions that change things like v4f32 to v2f64.
- if (V->getOpcode() == ISD::BIT_CONVERT) {
+ if (V->getOpcode() == ISD::BITCAST) {
SDValue ConvInput = V->getOperand(0);
if (ConvInput.getValueType().isVector() &&
ConvInput.getValueType().getVectorNumElements() == NumElts)
@@ -6344,30 +6663,28 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
if (V->getOpcode() == ISD::BUILD_VECTOR) {
- unsigned NumElems = V->getNumOperands();
- unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();
- if (NumElems > BaseIdx) {
- SDValue Base;
- bool AllSame = true;
- for (unsigned i = 0; i != NumElems; ++i) {
- if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
- Base = V->getOperand(i);
- break;
- }
+ assert(V->getNumOperands() == NumElts &&
+ "BUILD_VECTOR has wrong number of operands");
+ SDValue Base;
+ bool AllSame = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+ Base = V->getOperand(i);
+ break;
}
- // Splat of <u, u, u, u>, return <u, u, u, u>
- if (!Base.getNode())
- return N0;
- for (unsigned i = 0; i != NumElems; ++i) {
- if (V->getOperand(i) != Base) {
- AllSame = false;
- break;
- }
+ }
+ // Splat of <u, u, u, u>, return <u, u, u, u>
+ if (!Base.getNode())
+ return N0;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i) != Base) {
+ AllSame = false;
+ break;
}
- // Splat of <x, x, x, x>, return <x, x, x, x>
- if (AllSame)
- return N0;
}
+ // Splat of <x, x, x, x>, return <x, x, x, x>
+ if (AllSame)
+ return N0;
}
}
return SDValue();
@@ -6436,7 +6753,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (N->getOpcode() == ISD::AND) {
- if (RHS.getOpcode() == ISD::BIT_CONVERT)
+ if (RHS.getOpcode() == ISD::BITCAST)
RHS = RHS.getOperand(0);
if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
SmallVector<int, 8> Indices;
@@ -6464,9 +6781,9 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
DAG.getConstant(0, EltVT));
SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
RVT, &ZeroOps[0], ZeroOps.size());
- LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
+ LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
}
}
@@ -6480,10 +6797,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
// things. Simplifying them may result in a loss of legality.
if (LegalOperations) return SDValue();
- EVT VT = N->getValueType(0);
- assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
+ assert(N->getValueType(0).isVector() &&
+ "SimplifyVBinOp only works on vectors!");
- EVT EltType = VT.getVectorElementType();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Shuffle = XformToShuffleWithZero(N);
@@ -6516,14 +6832,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
break;
}
- // If the vector element type is not legal, the BUILD_VECTOR operands
- // are promoted and implicitly truncated. Make that explicit here.
- if (LHSOp.getValueType() != EltType)
- LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp);
- if (RHSOp.getValueType() != EltType)
- RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp);
-
- SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType,
+ EVT VT = LHSOp.getValueType();
+ assert(RHSOp.getValueType() == VT &&
+ "SimplifyVBinOp with different BUILD_VECTOR element types");
+ SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
LHSOp, RHSOp);
if (FoldOp.getOpcode() != ISD::UNDEF &&
FoldOp.getOpcode() != ISD::Constant &&
@@ -6533,11 +6845,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
AddToWorkList(FoldOp.getNode());
}
- if (Ops.size() == LHS.getNumOperands()) {
- EVT VT = LHS.getValueType();
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
- &Ops[0], Ops.size());
- }
+ if (Ops.size() == LHS.getNumOperands())
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ LHS.getValueType(), &Ops[0], Ops.size());
}
return SDValue();
@@ -6580,103 +6890,101 @@ SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
SDValue RHS) {
+ // Cannot simplify select with vector condition
+ if (TheSelect->getOperand(0).getValueType().isVector()) return false;
+
// If this is a select from two identical things, try to pull the operation
// through the select.
- if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
- // If this is a load and the token chain is identical, replace the select
- // of two loads with a load through a select of the address to load from.
- // This triggers in things like "select bool X, 10.0, 123.0" after the FP
- // constants have been dropped into the constant pool.
- if (LHS.getOpcode() == ISD::LOAD &&
+ if (LHS.getOpcode() != RHS.getOpcode() ||
+ !LHS.hasOneUse() || !RHS.hasOneUse())
+ return false;
+
+ // If this is a load and the token chain is identical, replace the select
+ // of two loads with a load through a select of the address to load from.
+ // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+ // constants have been dropped into the constant pool.
+ if (LHS.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+ LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+ // Token chains must be identical.
+ if (LHS.getOperand(0) != RHS.getOperand(0) ||
// Do not let this transformation reduce the number of volatile loads.
- !cast<LoadSDNode>(LHS)->isVolatile() &&
- !cast<LoadSDNode>(RHS)->isVolatile() &&
- // Token chains must be identical.
- LHS.getOperand(0) == RHS.getOperand(0)) {
- LoadSDNode *LLD = cast<LoadSDNode>(LHS);
- LoadSDNode *RLD = cast<LoadSDNode>(RHS);
-
- // If this is an EXTLOAD, the VT's must match.
- if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
+ LLD->isVolatile() || RLD->isVolatile() ||
+ // If this is an EXTLOAD, the VT's must match.
+ LLD->getMemoryVT() != RLD->getMemoryVT() ||
+ // If this is an EXTLOAD, the kind of extension must match.
+ (LLD->getExtensionType() != RLD->getExtensionType() &&
+ // The only exception is if one of the extensions is anyext.
+ LLD->getExtensionType() != ISD::EXTLOAD &&
+ RLD->getExtensionType() != ISD::EXTLOAD) ||
// FIXME: this discards src value information. This is
// over-conservative. It would be beneficial to be able to remember
// both potential memory locations. Since we are discarding
// src value info, don't do the transformation if the memory
// locations are not in the default address space.
- unsigned LLDAddrSpace = 0, RLDAddrSpace = 0;
- if (const Value *LLDVal = LLD->getMemOperand()->getValue()) {
- if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType()))
- LLDAddrSpace = PT->getAddressSpace();
- }
- if (const Value *RLDVal = RLD->getMemOperand()->getValue()) {
- if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType()))
- RLDAddrSpace = PT->getAddressSpace();
- }
- SDValue Addr;
- if (LLDAddrSpace == 0 && RLDAddrSpace == 0) {
- if (TheSelect->getOpcode() == ISD::SELECT) {
- // Check that the condition doesn't reach either load. If so, folding
- // this will induce a cycle into the DAG.
- if ((!LLD->hasAnyUseOfValue(1) ||
- !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
- (!RLD->hasAnyUseOfValue(1) ||
- !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
- Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
- LLD->getBasePtr().getValueType(),
- TheSelect->getOperand(0), LLD->getBasePtr(),
- RLD->getBasePtr());
- }
- } else {
- // Check that the condition doesn't reach either load. If so, folding
- // this will induce a cycle into the DAG.
- if ((!LLD->hasAnyUseOfValue(1) ||
- (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
- !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
- (!RLD->hasAnyUseOfValue(1) ||
- (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
- !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
- Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
- LLD->getBasePtr().getValueType(),
- TheSelect->getOperand(0),
- TheSelect->getOperand(1),
- LLD->getBasePtr(), RLD->getBasePtr(),
- TheSelect->getOperand(4));
- }
- }
- }
-
- if (Addr.getNode()) {
- SDValue Load;
- if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
- Load = DAG.getLoad(TheSelect->getValueType(0),
- TheSelect->getDebugLoc(),
- LLD->getChain(),
- Addr, 0, 0,
- LLD->isVolatile(),
- LLD->isNonTemporal(),
- LLD->getAlignment());
- } else {
- Load = DAG.getExtLoad(LLD->getExtensionType(),
- TheSelect->getValueType(0),
- TheSelect->getDebugLoc(),
- LLD->getChain(), Addr, 0, 0,
- LLD->getMemoryVT(),
- LLD->isVolatile(),
- LLD->isNonTemporal(),
- LLD->getAlignment());
- }
+ LLD->getPointerInfo().getAddrSpace() != 0 ||
+ RLD->getPointerInfo().getAddrSpace() != 0)
+ return false;
- // Users of the select now use the result of the load.
- CombineTo(TheSelect, Load);
+ // Check that the select condition doesn't reach either load. If so,
+ // folding this will induce a cycle into the DAG. If not, this is safe to
+ // xform, so create a select of the addresses.
+ SDValue Addr;
+ if (TheSelect->getOpcode() == ISD::SELECT) {
+ SDNode *CondNode = TheSelect->getOperand(0).getNode();
+ if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
+ (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
+ return false;
+ Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0), LLD->getBasePtr(),
+ RLD->getBasePtr());
+ } else { // Otherwise SELECT_CC
+ SDNode *CondLHS = TheSelect->getOperand(0).getNode();
+ SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+
+ if ((LLD->hasAnyUseOfValue(1) &&
+ (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
+ (LLD->hasAnyUseOfValue(1) &&
+ (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))))
+ return false;
- // Users of the old loads now use the new load's chain. We know the
- // old-load value is dead now.
- CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
- CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
- return true;
- }
- }
- }
+ Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0),
+ TheSelect->getOperand(1),
+ LLD->getBasePtr(), RLD->getBasePtr(),
+ TheSelect->getOperand(4));
+ }
+
+ SDValue Load;
+ if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ Load = DAG.getLoad(TheSelect->getValueType(0),
+ TheSelect->getDebugLoc(),
+ // FIXME: Discards pointer info.
+ LLD->getChain(), Addr, MachinePointerInfo(),
+ LLD->isVolatile(), LLD->isNonTemporal(),
+ LLD->getAlignment());
+ } else {
+ Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
+ RLD->getExtensionType() : LLD->getExtensionType(),
+ TheSelect->getDebugLoc(),
+ TheSelect->getValueType(0),
+ // FIXME: Discards pointer info.
+ LLD->getChain(), Addr, MachinePointerInfo(),
+ LLD->getMemoryVT(), LLD->isVolatile(),
+ LLD->isNonTemporal(), LLD->getAlignment());
+ }
+
+ // Users of the select now use the result of the load.
+ CombineTo(TheSelect, Load);
+
+ // Users of the old loads now use the new load's chain. We know the
+ // old-load value is dead now.
+ CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+ CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+ return true;
}
return false;
@@ -6689,7 +6997,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
ISD::CondCode CC, bool NotExtCompare) {
// (x ? y : y) -> y.
if (N2 == N3) return N2;
-
+
EVT VT = N2.getValueType();
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
@@ -6725,7 +7033,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
return DAG.getNode(ISD::FABS, DL, VT, N3);
}
}
-
+
// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
// in it. This is a win when the constant is not otherwise available because
@@ -6748,7 +7056,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
};
const Type *FPTy = Elts[0]->getType();
const TargetData &TD = *TLI.getTargetData();
-
+
// Create a ConstantArray of the two constants.
Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
@@ -6760,7 +7068,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
SDValue Zero = DAG.getIntPtrConstant(0);
unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
SDValue One = DAG.getIntPtrConstant(EltSize);
-
+
SDValue Cond = DAG.getSetCC(DL,
TLI.getSetCCResultType(N0.getValueType()),
N0, N1, CC);
@@ -6769,11 +7077,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
CstOffset);
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0, false,
+ MachinePointerInfo::getConstantPool(), false,
false, Alignment);
}
- }
+ }
// Check to see if we can perform the "gzip trick", transforming
// (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
@@ -6818,6 +7126,35 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
}
}
+ // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
+ // where y is has a single bit set.
+ // A plaintext description would be, we can turn the SELECT_CC into an AND
+ // when the condition can be materialized as an all-ones register. Any
+ // single bit-test can be materialized as an all-ones register with
+ // shift-left and shift-right-arith.
+ if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
+ N0->getValueType(0) == VT &&
+ N1C && N1C->isNullValue() &&
+ N2C && N2C->isNullValue()) {
+ SDValue AndLHS = N0->getOperand(0);
+ ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
+ // Shift the tested bit over the sign bit.
+ APInt AndMask = ConstAndRHS->getAPIntValue();
+ SDValue ShlAmt =
+ DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy());
+ SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt);
+
+ // Now arithmetic right shift it all the way over, so the result is either
+ // all-ones, or zero.
+ SDValue ShrAmt =
+ DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy());
+ SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt);
+
+ return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ }
+ }
+
// fold select C, 16, 0 -> shl C, 4
if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
@@ -6971,7 +7308,8 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
}
/// FindBaseOffset - Return true if base is a frame index, which is known not
-// to alias with anything but itself. Provides base object and offset as results.
+// to alias with anything but itself. Provides base object and offset as
+// results.
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
const GlobalValue *&GV, void *&CV) {
// Assume it is a primitive operation.
@@ -6984,7 +7322,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
Offset += C->getZExtValue();
}
}
-
+
// Return the underlying GlobalValue, and update the Offset. Return false
// for GlobalAddressSDNode since the same GlobalAddress may be represented
// by multiple nodes with different offsets.
@@ -7012,9 +7350,11 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
const Value *SrcValue1, int SrcValueOffset1,
unsigned SrcValueAlign1,
+ const MDNode *TBAAInfo1,
SDValue Ptr2, int64_t Size2,
const Value *SrcValue2, int SrcValueOffset2,
- unsigned SrcValueAlign2) const {
+ unsigned SrcValueAlign2,
+ const MDNode *TBAAInfo2) const {
// If they are the same then they must be aliases.
if (Ptr1 == Ptr2) return true;
@@ -7030,8 +7370,19 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
- // If we know what the bases are, and they aren't identical, then we know they
- // cannot alias.
+ // It is possible for different frame indices to alias each other, mostly
+ // when tail call optimization reuses return address slots for arguments.
+ // To catch this case, look up the actual index of frame indices to compute
+ // the real alias relationship.
+ if (isFrameIndex1 && isFrameIndex2) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+ Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+ return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+ }
+
+ // Otherwise, if we know what the bases are, and they aren't identical, then
+ // we know they cannot alias.
if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
return false;
@@ -7044,20 +7395,21 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
(Size1 == Size2) && (SrcValueAlign1 > Size1)) {
int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
-
+
// There is no overlap between these relatively aligned accesses of similar
// size, return no alias.
if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
return false;
}
-
+
if (CombinerGlobalAA) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
AliasAnalysis::AliasResult AAResult =
- AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);
+ AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
+ AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
if (AAResult == AliasAnalysis::NoAlias)
return false;
}
@@ -7070,15 +7422,17 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
/// node. Returns true if the operand was a load.
bool DAGCombiner::FindAliasInfo(SDNode *N,
SDValue &Ptr, int64_t &Size,
- const Value *&SrcValue,
+ const Value *&SrcValue,
int &SrcValueOffset,
- unsigned &SrcValueAlign) const {
+ unsigned &SrcValueAlign,
+ const MDNode *&TBAAInfo) const {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
Size = LD->getMemoryVT().getSizeInBits() >> 3;
SrcValue = LD->getSrcValue();
SrcValueOffset = LD->getSrcValueOffset();
SrcValueAlign = LD->getOriginalAlignment();
+ TBAAInfo = LD->getTBAAInfo();
return true;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
@@ -7086,6 +7440,7 @@ bool DAGCombiner::FindAliasInfo(SDNode *N,
SrcValue = ST->getSrcValue();
SrcValueOffset = ST->getSrcValueOffset();
SrcValueAlign = ST->getOriginalAlignment();
+ TBAAInfo = ST->getTBAAInfo();
} else {
llvm_unreachable("FindAliasInfo expected a memory operand");
}
@@ -7106,26 +7461,27 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
const Value *SrcValue;
int SrcValueOffset;
unsigned SrcValueAlign;
- bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
- SrcValueAlign);
+ const MDNode *SrcTBAAInfo;
+ bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
+ SrcValueAlign, SrcTBAAInfo);
// Starting off.
Chains.push_back(OriginalChain);
unsigned Depth = 0;
-
+
// Look at each chain and determine if it is an alias. If so, add it to the
// aliases list. If not, then continue up the chain looking for the next
// candidate.
while (!Chains.empty()) {
SDValue Chain = Chains.back();
Chains.pop_back();
-
- // For TokenFactor nodes, look at each operand and only continue up the
- // chain until we find two aliases. If we've seen two aliases, assume we'll
+
+ // For TokenFactor nodes, look at each operand and only continue up the
+ // chain until we find two aliases. If we've seen two aliases, assume we'll
// find more and revert to original chain since the xform is unlikely to be
// profitable.
- //
- // FIXME: The depth check could be made to return the last non-aliasing
+ //
+ // FIXME: The depth check could be made to return the last non-aliasing
// chain we found before we hit a tokenfactor rather than the original
// chain.
if (Depth > 6 || Aliases.size() == 2) {
@@ -7151,15 +7507,18 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
const Value *OpSrcValue;
int OpSrcValueOffset;
unsigned OpSrcValueAlign;
+ const MDNode *OpSrcTBAAInfo;
bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
OpSrcValue, OpSrcValueOffset,
- OpSrcValueAlign);
+ OpSrcValueAlign,
+ OpSrcTBAAInfo);
// If chain is alias then stop here.
if (!(IsLoad && IsOpLoad) &&
isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+ SrcTBAAInfo,
OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
- OpSrcValueAlign)) {
+ OpSrcValueAlign, OpSrcTBAAInfo)) {
Aliases.push_back(Chain);
} else {
// Look further up the chain.
@@ -7206,9 +7565,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
// If a single operand then chain to it. We don't need to revisit it.
return Aliases[0];
}
-
+
// Construct a custom tailored token factor.
- return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
&Aliases[0], Aliases.size());
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index a4eed71..490b857 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -55,6 +55,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
/// startNewBlock - Set the current block to which generated machine
@@ -197,12 +198,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
}
-
+
// If target-independent code couldn't handle the value, give target-specific
// code a try.
if (!Reg && isa<Constant>(V))
Reg = TargetMaterializeConstant(cast<Constant>(V));
-
+
// Don't cache constant materializations in the general ValueMap.
// To do so would require tracking what uses they dominate.
if (Reg != 0) {
@@ -234,7 +235,7 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
LocalValueMap[I] = Reg;
return Reg;
}
-
+
unsigned &AssignedReg = FuncInfo.ValueMap[I];
if (AssignedReg == 0)
// Use the new register.
@@ -414,7 +415,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
// If this is a constant subscript, handle it quickly.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero()) continue;
- uint64_t Offs =
+ uint64_t Offs =
TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
if (N == 0)
@@ -423,7 +424,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
NIsKill = true;
continue;
}
-
+
// N = N + Idx * ElementSize;
uint64_t ElementSize = TD.getTypeAllocSize(Ty);
std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
@@ -467,16 +468,28 @@ bool FastISel::SelectCall(const User *I) {
return true;
const Value *Address = DI->getAddress();
- if (!Address)
+ if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
return true;
- if (isa<UndefValue>(Address))
- return true;
- const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
- // Don't handle byval struct arguments or VLAs, for example.
- if (!AI)
- // Building the map above is target independent. Generating DBG_VALUE
- // inline is target dependent; do this now.
- (void)TargetSelectInstruction(cast<Instruction>(I));
+
+ unsigned Reg = 0;
+ unsigned Offset = 0;
+ if (const Argument *Arg = dyn_cast<Argument>(Address)) {
+ if (Arg->hasByValAttr()) {
+ // Byval arguments' frame index is recorded during argument lowering.
+ // Use this info directly.
+ Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+ if (Offset)
+ Reg = TRI.getFrameRegister(*FuncInfo.MF);
+ }
+ }
+ if (!Reg)
+ Reg = getRegForValue(Address);
+
+ if (Reg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(Reg, RegState::Debug).addImm(Offset)
+ .addMetadata(DI->getVariable());
return true;
}
case Intrinsic::dbg_value: {
@@ -505,11 +518,8 @@ bool FastISel::SelectCall(const User *I) {
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
- // Insert an undef so we can see what we dropped.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
- .addReg(0U).addImm(DI->getOffset())
- .addMetadata(DI->getVariable());
- }
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ }
return true;
}
case Intrinsic::eh_exception: {
@@ -582,12 +592,12 @@ bool FastISel::SelectCall(const User *I) {
bool FastISel::SelectCast(const User *I, unsigned Opcode) {
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
-
+
if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
DstVT == MVT::Other || !DstVT.isSimple())
// Unhandled type. Halt "fast" selection and bail.
return false;
-
+
// Check if the destination type is legal. Or as a special case,
// it may be i1 if we're doing a truncate because that's
// easy and somewhat common.
@@ -629,7 +639,7 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
InputReg, InputRegIsKill);
if (!ResultReg)
return false;
-
+
UpdateValueMap(I, ResultReg);
return true;
}
@@ -644,23 +654,23 @@ bool FastISel::SelectBitCast(const User *I) {
return true;
}
- // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators.
+ // Bitcasts of other values become reg-reg copies or BITCAST operators.
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
-
+
if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
DstVT == MVT::Other || !DstVT.isSimple() ||
!TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))
// Unhandled type. Halt "fast" selection and bail.
return false;
-
+
unsigned Op0 = getRegForValue(I->getOperand(0));
if (Op0 == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
-
+
// First, try to perform the bitcast by inserting a reg-reg copy.
unsigned ResultReg = 0;
if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
@@ -673,15 +683,15 @@ bool FastISel::SelectBitCast(const User *I) {
ResultReg).addReg(Op0);
}
}
-
- // If the reg-reg copy failed, select a BIT_CONVERT opcode.
+
+ // If the reg-reg copy failed, select a BITCAST opcode.
if (!ResultReg)
ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
- ISD::BIT_CONVERT, Op0, Op0IsKill);
-
+ ISD::BITCAST, Op0, Op0IsKill);
+
if (!ResultReg)
return false;
-
+
UpdateValueMap(I, ResultReg);
return true;
}
@@ -753,7 +763,7 @@ FastISel::SelectFNeg(const User *I) {
return false;
unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
- ISD::BIT_CONVERT, OpReg, OpRegIsKill);
+ ISD::BITCAST, OpReg, OpRegIsKill);
if (IntReg == 0)
return false;
@@ -765,7 +775,7 @@ FastISel::SelectFNeg(const User *I) {
return false;
ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
- ISD::BIT_CONVERT, IntResultReg, /*Kill=*/true);
+ ISD::BITCAST, IntResultReg, /*Kill=*/true);
if (ResultReg == 0)
return false;
@@ -845,10 +855,10 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
// Dynamic-sized alloca is not handled yet.
return false;
-
+
case Instruction::Call:
return SelectCall(I);
-
+
case Instruction::BitCast:
return SelectBitCast(I);
@@ -911,7 +921,7 @@ unsigned FastISel::FastEmit_r(MVT, MVT,
return 0;
}
-unsigned FastISel::FastEmit_rr(MVT, MVT,
+unsigned FastISel::FastEmit_rr(MVT, MVT,
unsigned,
unsigned /*Op0*/, bool /*Op0IsKill*/,
unsigned /*Op1*/, bool /*Op1IsKill*/) {
@@ -1139,7 +1149,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
-
+
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
else {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 5ef6404..98582ba 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -29,7 +29,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 61c2a90..e309def 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -31,11 +31,11 @@
using namespace llvm;
/// CountResults - The results of target nodes have register or immediate
-/// operands first, then an optional chain, and optional flag operands (which do
+/// operands first, then an optional chain, and optional glue operands (which do
/// not go into the resulting MachineInstr).
unsigned InstrEmitter::CountResults(SDNode *Node) {
unsigned N = Node->getNumValues();
- while (N && Node->getValueType(N - 1) == MVT::Flag)
+ while (N && Node->getValueType(N - 1) == MVT::Glue)
--N;
if (N && Node->getValueType(N - 1) == MVT::Other)
--N; // Skip over chain result.
@@ -43,12 +43,12 @@ unsigned InstrEmitter::CountResults(SDNode *Node) {
}
/// CountOperands - The inputs to target nodes have any actual inputs first,
-/// followed by an optional chain operand, then an optional flag operand.
+/// followed by an optional chain operand, then an optional glue operand.
/// Compute the number of actual operands that will go into the resulting
/// MachineInstr.
unsigned InstrEmitter::CountOperands(SDNode *Node) {
unsigned N = Node->getNumOperands();
- while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
--N;
if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
--N; // Ignore chain if it exists.
@@ -67,7 +67,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (IsClone)
VRBaseMap.erase(Op);
bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
return;
}
@@ -96,7 +96,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (Op.getNode() != Node || Op.getResNo() != ResNo)
continue;
EVT VT = Node->getValueType(Op.getResNo());
- if (VT == MVT::Other || VT == MVT::Flag)
+ if (VT == MVT::Other || VT == MVT::Glue)
continue;
Match = false;
if (User->isMachineOpcode()) {
@@ -150,7 +150,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (IsClone)
VRBaseMap.erase(Op);
bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -224,7 +224,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
if (IsClone)
VRBaseMap.erase(Op);
bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
}
@@ -264,8 +264,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
DenseMap<SDValue, unsigned> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned) {
assert(Op.getValueType() != MVT::Other &&
- Op.getValueType() != MVT::Flag &&
- "Chain and flag operands should occur at end of operand list!");
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
// Get/emit the operand.
unsigned VReg = getVR(Op, VRBaseMap);
assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
@@ -377,8 +377,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
BA->getTargetFlags()));
} else {
assert(Op.getValueType() != MVT::Other &&
- Op.getValueType() != MVT::Flag &&
- "Chain and flag operands should occur at end of operand list!");
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
IsDebug, IsClone, IsCloned);
}
@@ -428,31 +428,47 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
// Figure out the register class to create for the destreg.
unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
- const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
- const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
- assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
-
- // Figure out the register class to create for the destreg.
- // Note that if we're going to directly use an existing register,
- // it must be precisely the required class, and not a subclass
- // thereof.
- if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
- // Create the reg
- assert(SRC && "Couldn't find source register class");
- VRBase = MRI->createVirtualRegister(SRC);
- }
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ unsigned SrcReg, DstReg, DefSubIdx;
+ if (DefMI &&
+ TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
+ SubIdx == DefSubIdx) {
+ // Optimize these:
+ // r1025 = s/zext r1024, 4
+ // r1026 = extract_subreg r1025, 4
+ // to a copy
+ // r1026 = copy r1024
+ const TargetRegisterClass *TRC = MRI->getRegClass(SrcReg);
+ VRBase = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+ } else {
+ const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
+ const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
+ assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
+
+ // Figure out the register class to create for the destreg.
+ // Note that if we're going to directly use an existing register,
+ // it must be precisely the required class, and not a subclass
+ // thereof.
+ if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+ // Create the reg
+ assert(SRC && "Couldn't find source register class");
+ VRBase = MRI->createVirtualRegister(SRC);
+ }
- // Create the extract_subreg machine instruction.
- MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
- TII->get(TargetOpcode::COPY), VRBase);
+ // Create the extract_subreg machine instruction.
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase);
- // Add source, and subreg index
- AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
- IsClone, IsCloned);
- assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) &&
- "Cannot yet extract from physregs");
- MI->getOperand(1).setSubReg(SubIdx);
- MBB->insert(InsertPos, MI);
+ // Add source, and subreg index
+ AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg())&&
+ "Cannot yet extract from physregs");
+ MI->getOperand(1).setSubReg(SubIdx);
+ MBB->insert(InsertPos, MI);
+ }
} else if (Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG) {
SDValue N0 = Node->getOperand(0);
@@ -496,7 +512,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
SDValue Op(Node, 0);
bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -518,7 +534,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
SDValue Op(Node, 0);
bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -543,9 +559,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
const TargetRegisterClass *SRC =
TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
- if (!SRC)
- llvm_unreachable("Invalid subregister index in REG_SEQUENCE");
- if (SRC != RC) {
+ if (SRC && SRC != RC) {
MRI->setRegClass(NewVReg, SRC);
RC = SRC;
}
@@ -557,7 +571,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
MBB->insert(InsertPos, MI);
SDValue Op(Node, 0);
bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -673,10 +687,10 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// The MachineInstr constructor adds implicit-def operands. Scan through
// these to determine which are dead.
if (MI->getNumOperands() != 0 &&
- Node->getValueType(Node->getNumValues()-1) == MVT::Flag) {
+ Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
// First, collect all used registers.
SmallVector<unsigned, 8> UsedRegs;
- for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser())
+ for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
if (F->getOpcode() == ISD::CopyFromReg)
UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
else {
@@ -689,7 +703,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
unsigned Reg = R->getReg();
- if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
UsedRegs.push_back(Reg);
}
}
@@ -721,20 +735,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// hook knows where in the block to insert the replacement code.
MBB->insert(InsertPos, MI);
- if (II.usesCustomInsertionHook()) {
- // Insert this instruction into the basic block using a target
- // specific inserter which may returns a new basic block.
- bool AtEnd = InsertPos == MBB->end();
- MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
- if (NewMBB != MBB) {
- if (AtEnd)
- InsertPos = NewMBB->end();
- MBB = NewMBB;
- }
- return;
- }
-
- // Additional results must be an physical register def.
+ // Additional results must be physical register defs.
if (HasPhysRegOuts) {
for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
@@ -742,17 +743,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
// If there are no uses, mark the register as dead now, so that
// MachineLICM/Sink can see that it's dead. Don't do this if the
- // node has a Flag value, for the benefit of targets still using
- // Flag for values in physregs.
- else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+ // node has a Glue value, for the benefit of targets still using
+ // Glue for values in physregs.
+ else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
MI->addRegisterDead(Reg, TRI);
}
}
// If the instruction has implicit defs and the node doesn't, mark the
- // implicit def as dead. If the node has any flag outputs, we don't do this
- // because we don't know what implicit defs are being used by flagged nodes.
- if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+ // implicit def as dead. If the node has any glue outputs, we don't do this
+ // because we don't know what implicit defs are being used by glued nodes.
+ if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
if (const unsigned *IDList = II.getImplicitDefs()) {
for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
i != e; ++i)
@@ -808,8 +809,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case ISD::INLINEASM: {
unsigned NumOps = Node->getNumOperands();
- if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
- --NumOps; // Ignore the flag operand.
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
// Create the inline asm machine instruction.
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
@@ -820,11 +821,11 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
MI->addOperand(MachineOperand::CreateES(AsmStr));
- // Add the isAlignStack bit.
- int64_t isAlignStack =
- cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_IsAlignStack))->
+ // Add the HasSideEffect and isAlignStack bits.
+ int64_t ExtraInfo =
+ cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
getZExtValue();
- MI->addOperand(MachineOperand::CreateImm(isAlignStack));
+ MI->addOperand(MachineOperand::CreateImm(ExtraInfo));
// Add all of the operand registers to the instruction.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 2981cd3..49c862c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -11,14 +11,15 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
@@ -65,11 +66,6 @@ class SelectionDAGLegalize {
/// against each other, including inserted libcalls.
SDValue LastCALLSEQ_END;
- /// IsLegalizingCall - This member is used *only* for purposes of providing
- /// helpful assertions that a libcall isn't created while another call is
- /// being legalized (which could lead to non-serialized call sequences).
- bool IsLegalizingCall;
-
enum LegalizeAction {
Legal, // The target natively supports this operation.
Promote, // This operation should be executed in a larger type.
@@ -91,6 +87,9 @@ class SelectionDAGLegalize {
// If someone requests legalization of the new node, return itself.
if (From != To)
LegalizedNodes.insert(std::make_pair(To, To));
+
+ // Transfer SDDbgValues.
+ DAG.TransferDbgValues(From, To);
}
public:
@@ -172,6 +171,7 @@ private:
SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+ SDValue ExpandInsertToVectorThroughStack(SDValue Op);
SDValue ExpandVectorBuildThroughStack(SDNode* Node);
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
@@ -224,7 +224,6 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
void SelectionDAGLegalize::LegalizeDAG() {
LastCALLSEQ_END = DAG.getEntryNode();
- IsLegalizingCall = false;
// The legalize process is inherently a bottom-up recursive process (users
// legalize their uses before themselves). Given infinite stack space, we
@@ -251,9 +250,16 @@ void SelectionDAGLegalize::LegalizeDAG() {
/// FindCallEndFromCallStart - Given a chained node that is part of a call
/// sequence, find the CALLSEQ_END node that terminates the call sequence.
-static SDNode *FindCallEndFromCallStart(SDNode *Node) {
- if (Node->getOpcode() == ISD::CALLSEQ_END)
- return Node;
+static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
+ // Nested CALLSEQ_START/END constructs aren't yet legal,
+ // but we can DTRT and handle them correctly here.
+ if (Node->getOpcode() == ISD::CALLSEQ_START)
+ depth++;
+ else if (Node->getOpcode() == ISD::CALLSEQ_END) {
+ depth--;
+ if (depth == 0)
+ return Node;
+ }
if (Node->use_empty())
return 0; // No CallSeqEnd
@@ -283,7 +289,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) {
SDNode *User = *UI;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
if (User->getOperand(i) == TheChain)
- if (SDNode *Result = FindCallEndFromCallStart(User))
+ if (SDNode *Result = FindCallEndFromCallStart(User, depth))
return Result;
}
return 0;
@@ -292,12 +298,26 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) {
/// FindCallStartFromCallEnd - Given a chained node that is part of a call
/// sequence, find the CALLSEQ_START node that initiates the call sequence.
static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+ int nested = 0;
assert(Node && "Didn't find callseq_start for a call??");
- if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
-
- assert(Node->getOperand(0).getValueType() == MVT::Other &&
- "Node doesn't have a token chain argument!");
- return FindCallStartFromCallEnd(Node->getOperand(0).getNode());
+ while (Node->getOpcode() != ISD::CALLSEQ_START || nested) {
+ Node = Node->getOperand(0).getNode();
+ assert(Node->getOperand(0).getValueType() == MVT::Other &&
+ "Node doesn't have a token chain argument!");
+ switch (Node->getOpcode()) {
+ default:
+ break;
+ case ISD::CALLSEQ_START:
+ if (!nested)
+ return Node;
+ nested--;
+ break;
+ case ISD::CALLSEQ_END:
+ nested++;
+ break;
+ }
+ }
+ return 0;
}
/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
@@ -377,12 +397,12 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
if (Extend)
- return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl,
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
DAG.getEntryNode(),
- CPIdx, PseudoSourceValue::getConstantPool(),
- 0, VT, false, false, Alignment);
+ CPIdx, MachinePointerInfo::getConstantPool(),
+ VT, false, false, Alignment);
return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0, false, false,
+ MachinePointerInfo::getConstantPool(), false, false,
Alignment);
}
@@ -395,7 +415,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
SDValue Val = ST->getValue();
EVT VT = Val.getValueType();
int Alignment = ST->getAlignment();
- int SVOffset = ST->getSrcValueOffset();
DebugLoc dl = ST->getDebugLoc();
if (ST->getMemoryVT().isFloatingPoint() ||
ST->getMemoryVT().isVector()) {
@@ -404,10 +423,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Expand to a bitconvert of the value to the integer type of the
// same size, then a (misaligned) int store.
// FIXME: Does not handle truncating floating point stores!
- SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val);
- return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(),
- SVOffset, ST->isVolatile(), ST->isNonTemporal(),
- Alignment);
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
+ return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(), Alignment);
} else {
// Do a (aligned) store to a stack slot, then copy from the stack slot
// to the final destination using (unaligned) integer loads and stores.
@@ -425,8 +443,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Perform the original store, only redirected to the stack slot.
SDValue Store = DAG.getTruncStore(Chain, dl,
- Val, StackPtr, NULL, 0, StoredVT,
- false, false, 0);
+ Val, StackPtr, MachinePointerInfo(),
+ StoredVT, false, false, 0);
SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
SmallVector<SDValue, 8> Stores;
unsigned Offset = 0;
@@ -434,11 +452,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Do all but one copies using the full register width.
for (unsigned i = 1; i < NumRegs; i++) {
// Load one integer register's worth from the stack slot.
- SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0,
+ SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
+ MachinePointerInfo(),
false, false, 0);
// Store it to the final location. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
- ST->getSrcValue(), SVOffset + Offset,
+ ST->getPointerInfo().getWithOffset(Offset),
ST->isVolatile(), ST->isNonTemporal(),
MinAlign(ST->getAlignment(), Offset)));
// Increment the pointers.
@@ -455,11 +474,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
8 * (StoredBytes - Offset));
// Load from the stack slot.
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr,
- NULL, 0, MemVT, false, false, 0);
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ MachinePointerInfo(),
+ MemVT, false, false, 0);
Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
- ST->getSrcValue(), SVOffset + Offset,
+ ST->getPointerInfo()
+ .getWithOffset(Offset),
MemVT, ST->isVolatile(),
ST->isNonTemporal(),
MinAlign(ST->getAlignment(), Offset)));
@@ -484,13 +505,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Store the two parts
SDValue Store1, Store2;
Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
- ST->getSrcValue(), SVOffset, NewStoredVT,
+ ST->getPointerInfo(), NewStoredVT,
ST->isVolatile(), ST->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
Alignment = MinAlign(Alignment, IncrementSize);
Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
- ST->getSrcValue(), SVOffset + IncrementSize,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
Alignment);
@@ -501,7 +522,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
static
SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
const TargetLowering &TLI) {
- int SVOffset = LD->getSrcValueOffset();
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
EVT VT = LD->getValueType(0);
@@ -512,74 +532,75 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
if (TLI.isTypeLegal(intVT)) {
// Expand to a (misaligned) integer load of the same size,
// then bitconvert to floating point or vector.
- SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset, LD->isVolatile(),
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
- SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad);
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
if (VT.isFloatingPoint() && LoadedVT != VT)
Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
SDValue Ops[] = { Result, Chain };
return DAG.getMergeValues(Ops, 2, dl);
- } else {
- // Copy the value to a (aligned) stack slot using (unaligned) integer
- // loads and stores, then do a (aligned) load from the stack slot.
- EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
- unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
- unsigned RegBytes = RegVT.getSizeInBits() / 8;
- unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
-
- // Make sure the stack slot is also aligned for the register type.
- SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
-
- SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
- SmallVector<SDValue, 8> Stores;
- SDValue StackPtr = StackBase;
- unsigned Offset = 0;
-
- // Do all but one copies using the full register width.
- for (unsigned i = 1; i < NumRegs; i++) {
- // Load one integer register's worth from the original location.
- SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset + Offset, LD->isVolatile(),
- LD->isNonTemporal(),
- MinAlign(LD->getAlignment(), Offset));
- // Follow the load with a store to the stack slot. Remember the store.
- Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
- NULL, 0, false, false, 0));
- // Increment the pointers.
- Offset += RegBytes;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- Increment);
- }
+ }
- // The last copy may be partial. Do an extending load.
- EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
- 8 * (LoadedBytes - Offset));
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr,
- LD->getSrcValue(), SVOffset + Offset,
- MemVT, LD->isVolatile(),
- LD->isNonTemporal(),
- MinAlign(LD->getAlignment(), Offset));
+ // Copy the value to a (aligned) stack slot using (unaligned) integer
+ // loads and stores, then do a (aligned) load from the stack slot.
+ EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
+ unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ SDValue StackPtr = StackBase;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the original location.
+ SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ LD->isVolatile(), LD->isNonTemporal(),
+ MinAlign(LD->getAlignment(), Offset));
// Follow the load with a store to the stack slot. Remember the store.
- // On big-endian machines this requires a truncating store to ensure
- // that the bits end up in the right place.
- Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
- NULL, 0, MemVT, false, false, 0));
-
- // The order of the stores doesn't matter - say it with a TokenFactor.
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
- Stores.size());
-
- // Finally, perform the original load only redirected to the stack slot.
- Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase,
- NULL, 0, LoadedVT, false, false, 0);
-
- // Callers expect a MERGE_VALUES node.
- SDValue Ops[] = { Load, TF };
- return DAG.getMergeValues(Ops, 2, dl);
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), false, false, 0));
+ // Increment the pointers.
+ Offset += RegBytes;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
}
+
+ // The last copy may be partial. Do an extending load.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (LoadedBytes - Offset));
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ MemVT, LD->isVolatile(),
+ LD->isNonTemporal(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ // On big-endian machines this requires a truncating store to ensure
+ // that the bits end up in the right place.
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), MemVT,
+ false, false, 0));
+
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+
+ // Finally, perform the original load only redirected to the stack slot.
+ Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ MachinePointerInfo(), LoadedVT, false, false, 0);
+
+ // Callers expect a MERGE_VALUES node.
+ SDValue Ops[] = { Load, TF };
+ return DAG.getMergeValues(Ops, 2, dl);
}
assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
"Unaligned load of unsupported type.");
@@ -602,22 +623,24 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// Load the value in two parts
SDValue Lo, Hi;
if (TLI.isLittleEndian()) {
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset, NewLoadedVT, LD->isVolatile(),
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
- Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
} else {
- Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset, NewLoadedVT, LD->isVolatile(),
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
}
@@ -660,7 +683,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
// Store the vector.
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
+ MachinePointerInfo::getFixedStack(SPFI),
false, false, 0);
// Truncate or zero extend offset to target pointer type.
@@ -671,13 +694,11 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
// Store the scalar value.
- Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2,
- PseudoSourceValue::getFixedStack(SPFI), 0, EltVT,
+ Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
false, false, 0);
// Load the updated vector.
return DAG.getLoad(VT, dl, Ch, StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
- false, false, 0);
+ MachinePointerInfo::getFixedStack(SPFI), false, false, 0);
}
@@ -719,7 +740,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Tmp1 = ST->getChain();
SDValue Tmp2 = ST->getBasePtr();
SDValue Tmp3;
- int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
@@ -730,29 +750,34 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
Tmp3 = DAG.getConstant(CFP->getValueAPF().
bitcastToAPInt().zextOrTrunc(32),
MVT::i32);
- return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal, Alignment);
- } else if (CFP->getValueType(0) == MVT::f64) {
+ return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ }
+
+ if (CFP->getValueType(0) == MVT::f64) {
// If this target supports 64-bit registers, do a single 64-bit store.
if (getTypeAction(MVT::i64) == Legal) {
Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
zextOrTrunc(64), MVT::i64);
- return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal, Alignment);
- } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+ return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ }
+
+ if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
// Otherwise, if the target supports 32-bit registers, use 2 32-bit
// stores. If the target supports neither 32- nor 64-bits, this
// xform is certainly not worth it.
const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
- SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32);
+ SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32);
SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
if (TLI.isBigEndian()) std::swap(Lo, Hi);
- Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal, Alignment);
+ Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
DAG.getIntPtrConstant(4));
- Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
+ Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2,
+ ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
@@ -792,7 +817,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
bool isCustom = false;
// Figure out the correct action; the way to query this varies by opcode
- TargetLowering::LegalizeAction Action;
+ TargetLowering::LegalizeAction Action = TargetLowering::Legal;
bool SimpleFinishLegalizing = true;
switch (Node->getOpcode()) {
case ISD::INTRINSIC_W_CHAIN:
@@ -860,6 +885,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::FRAME_TO_ARGS_OFFSET:
case ISD::EH_SJLJ_SETJMP:
case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_DISPATCHSETUP:
// These operations lie about being legal: when they claim to be legal,
// they should actually be expanded.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -996,6 +1022,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
}
break;
case ISD::CALLSEQ_START: {
+ static int depth = 0;
SDNode *CallEnd = FindCallEndFromCallStart(Node);
// Recursively Legalize all of the inputs of the call end that do not lead
@@ -1013,7 +1040,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Merge in the last call to ensure that this call starts after the last
// call ended.
- if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
+ if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken && depth == 0) {
Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Tmp1, LastCALLSEQ_END);
Tmp1 = LegalizeOp(Tmp1);
@@ -1036,14 +1063,18 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// sequence have been legalized, legalize the call itself. During this
// process, no libcalls can/will be inserted, guaranteeing that no calls
// can overlap.
- assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
+
+ SDValue Saved_LastCALLSEQ_END = LastCALLSEQ_END ;
// Note that we are selecting this call!
LastCALLSEQ_END = SDValue(CallEnd, 0);
- IsLegalizingCall = true;
+ depth++;
// Legalize the call, starting from the CALLSEQ_END.
LegalizeOp(LastCALLSEQ_END);
- assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
+ depth--;
+ assert(depth >= 0 && "Un-matched CALLSEQ_START?");
+ if (depth > 0)
+ LastCALLSEQ_END = Saved_LastCALLSEQ_END;
return Result;
}
case ISD::CALLSEQ_END:
@@ -1062,7 +1093,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
// Do not try to legalize the target-specific arguments (#1+), except for
// an optional flag input.
- if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){
+ if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){
if (Tmp1 != Node->getOperand(0)) {
SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
Ops[0] = Tmp1;
@@ -1082,10 +1113,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Result.getResNo());
}
}
- assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
// This finishes up call legalization.
- IsLegalizingCall = false;
-
// If the CALLSEQ_END node has a flag, remember that we legalized it.
AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
if (Node->getNumValues() == 2)
@@ -1136,11 +1164,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Change base type to a different vector type.
EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
- Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
- LD->getSrcValueOffset(),
+ Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
LD->getAlignment());
- Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1));
+ Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1));
Tmp4 = LegalizeOp(Tmp1.getValue(1));
break;
}
@@ -1150,227 +1177,224 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
AddLegalizedOperand(SDValue(Node, 0), Tmp3);
AddLegalizedOperand(SDValue(Node, 1), Tmp4);
return Op.getResNo() ? Tmp4 : Tmp3;
- } else {
- EVT SrcVT = LD->getMemoryVT();
- unsigned SrcWidth = SrcVT.getSizeInBits();
- int SVOffset = LD->getSrcValueOffset();
- unsigned Alignment = LD->getAlignment();
- bool isVolatile = LD->isVolatile();
- bool isNonTemporal = LD->isNonTemporal();
-
- if (SrcWidth != SrcVT.getStoreSizeInBits() &&
- // Some targets pretend to have an i1 loading operation, and actually
- // load an i8. This trick is correct for ZEXTLOAD because the top 7
- // bits are guaranteed to be zero; it helps the optimizers understand
- // that these bits are zero. It is also useful for EXTLOAD, since it
- // tells the optimizers that those bits are undefined. It would be
- // nice to have an effective generic way of getting these benefits...
- // Until such a way is found, don't insist on promoting i1 here.
- (SrcVT != MVT::i1 ||
- TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
- // Promote to a byte-sized load if not loading an integral number of
- // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
- unsigned NewWidth = SrcVT.getStoreSizeInBits();
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
- SDValue Ch;
-
- // The extra bits are guaranteed to be zero, since we stored them that
- // way. A zext load from NVT thus automatically gives zext from SrcVT.
-
- ISD::LoadExtType NewExtType =
- ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
-
- Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl,
- Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
- NVT, isVolatile, isNonTemporal, Alignment);
-
- Ch = Result.getValue(1); // The chain.
-
- if (ExtType == ISD::SEXTLOAD)
- // Having the top bits zero doesn't help when sign extending.
- Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
- Result.getValueType(),
- Result, DAG.getValueType(SrcVT));
- else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
- // All the top bits are guaranteed to be zero - inform the optimizers.
- Result = DAG.getNode(ISD::AssertZext, dl,
- Result.getValueType(), Result,
- DAG.getValueType(SrcVT));
-
- Tmp1 = LegalizeOp(Result);
- Tmp2 = LegalizeOp(Ch);
- } else if (SrcWidth & (SrcWidth - 1)) {
- // If not loading a power-of-2 number of bits, expand as two loads.
- assert(!SrcVT.isVector() && "Unsupported extload!");
- unsigned RoundWidth = 1 << Log2_32(SrcWidth);
- assert(RoundWidth < SrcWidth);
- unsigned ExtraWidth = SrcWidth - RoundWidth;
- assert(ExtraWidth < RoundWidth);
- assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
- "Load size not an integral number of bytes!");
- EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
- EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
- SDValue Lo, Hi, Ch;
- unsigned IncrementSize;
+ }
- if (TLI.isLittleEndian()) {
- // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
- // Load the bottom RoundWidth bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl,
- Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
- isNonTemporal, Alignment);
-
- // Load the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset + IncrementSize,
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
-
- // Build a factor node to remember that this load is independent of
- // the other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+ EVT SrcVT = LD->getMemoryVT();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+ SDValue Ch;
+
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
+
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+ Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+
+ Ch = Result.getValue(1); // The chain.
+
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
+
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else if (SrcWidth & (SrcWidth - 1)) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(!SrcVT.isVector() && "Unsupported extload!");
+ unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ assert(RoundWidth < SrcWidth);
+ unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ } else {
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+ dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ }
- // Join the hi and lo parts.
- Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else {
+ switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp2, LD->getOffset()),
+ Result.getResNo());
+ Tmp1 = Result.getValue(0);
+ Tmp2 = Result.getValue(1);
+
+ if (isCustom) {
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.getNode()) {
+ Tmp1 = LegalizeOp(Tmp3);
+ Tmp2 = LegalizeOp(Tmp3.getValue(1));
+ }
} else {
- // Big endian - avoid unaligned loads.
- // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
- // Load the top RoundWidth bits.
- Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
- isNonTemporal, Alignment);
-
- // Load the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
- Node->getValueType(0), dl, Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset + IncrementSize,
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
-
- // Build a factor node to remember that this load is independent of
- // the other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
-
- // Join the hi and lo parts.
- Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
- }
-
- Tmp1 = LegalizeOp(Result);
- Tmp2 = LegalizeOp(Ch);
- } else {
- switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
- default: assert(0 && "This action is not supported yet!");
- case TargetLowering::Custom:
- isCustom = true;
- // FALLTHROUGH
- case TargetLowering::Legal:
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp2, LD->getOffset()),
- Result.getResNo());
- Tmp1 = Result.getValue(0);
- Tmp2 = Result.getValue(1);
-
- if (isCustom) {
- Tmp3 = TLI.LowerOperation(Result, DAG);
- if (Tmp3.getNode()) {
- Tmp1 = LegalizeOp(Tmp3);
- Tmp2 = LegalizeOp(Tmp3.getValue(1));
- }
- } else {
- // If this is an unaligned load and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- const Type *Ty =
- LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment =
- TLI.getTargetData()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
- Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
- DAG, TLI);
- Tmp1 = Result.getOperand(0);
- Tmp2 = Result.getOperand(1);
- Tmp1 = LegalizeOp(Tmp1);
- Tmp2 = LegalizeOp(Tmp2);
- }
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ const Type *Ty =
+ LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+ DAG, TLI);
+ Tmp1 = Result.getOperand(0);
+ Tmp2 = Result.getOperand(1);
+ Tmp1 = LegalizeOp(Tmp1);
+ Tmp2 = LegalizeOp(Tmp2);
}
}
- break;
- case TargetLowering::Expand:
- if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
- SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
- LD->getSrcValueOffset(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- unsigned ExtendOp;
- switch (ExtType) {
- case ISD::EXTLOAD:
- ExtendOp = (SrcVT.isFloatingPoint() ?
- ISD::FP_EXTEND : ISD::ANY_EXTEND);
- break;
- case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
- case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
- default: llvm_unreachable("Unexpected extend load type!");
- }
- Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
- Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Load.getValue(1));
+ }
+ break;
+ case TargetLowering::Expand:
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
+ SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
+ LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ unsigned ExtendOp;
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ ExtendOp = (SrcVT.isFloatingPoint() ?
+ ISD::FP_EXTEND : ISD::ANY_EXTEND);
break;
+ case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+ default: llvm_unreachable("Unexpected extend load type!");
}
- // FIXME: This does not work for vectors on most targets. Sign- and
- // zero-extend operations are currently folded into extending loads,
- // whether they are legal or not, and then we end up here without any
- // support for legalizing them.
- assert(ExtType != ISD::EXTLOAD &&
- "EXTLOAD should always be supported!");
- // Turn the unsupported load into an EXTLOAD followed by an explicit
- // zero/sign extend inreg.
- Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl,
- Tmp1, Tmp2, LD->getSrcValue(),
- LD->getSrcValueOffset(), SrcVT,
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- SDValue ValRes;
- if (ExtType == ISD::SEXTLOAD)
- ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
- Result.getValueType(),
- Result, DAG.getValueType(SrcVT));
- else
- ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT);
- Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
+ Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Load.getValue(1));
break;
}
+ // FIXME: This does not work for vectors on most targets. Sign- and
+ // zero-extend operations are currently folded into extending loads,
+ // whether they are legal or not, and then we end up here without any
+ // support for legalizing them.
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an explicit
+ // zero/sign extend inreg.
+ Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
+ break;
}
-
- // Since loads produce two values, make sure to remember that we legalized
- // both of them.
- AddLegalizedOperand(SDValue(Node, 0), Tmp1);
- AddLegalizedOperand(SDValue(Node, 1), Tmp2);
- return Op.getResNo() ? Tmp2 : Tmp1;
}
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ AddLegalizedOperand(SDValue(Node, 0), Tmp1);
+ AddLegalizedOperand(SDValue(Node, 1), Tmp2);
+ return Op.getResNo() ? Tmp2 : Tmp1;
}
case ISD::STORE: {
StoreSDNode *ST = cast<StoreSDNode>(Node);
Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain.
Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer.
- int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
@@ -1408,10 +1432,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
break;
case TargetLowering::Promote:
assert(VT.isVector() && "Unknown legal promote case!");
- Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl,
+ Tmp3 = DAG.getNode(ISD::BITCAST, dl,
TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
- ST->getSrcValue(), SVOffset, isVolatile,
+ ST->getPointerInfo(), isVolatile,
isNonTemporal, Alignment);
break;
}
@@ -1430,9 +1454,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
StVT.getStoreSizeInBits());
Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
- Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, NVT, isVolatile, isNonTemporal,
- Alignment);
+ Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
assert(!StVT.isVector() && "Unsupported truncstore!");
@@ -1450,8 +1473,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
if (TLI.isLittleEndian()) {
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
// Store the bottom RoundWidth bits.
- Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, RoundVT,
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ RoundVT,
isVolatile, isNonTemporal, Alignment);
// Store the remaining ExtraWidth bits.
@@ -1460,9 +1483,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
DAG.getIntPtrConstant(IncrementSize));
Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
- Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
- SVOffset + IncrementSize, ExtraVT, isVolatile,
- isNonTemporal,
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
} else {
// Big endian - avoid unaligned stores.
@@ -1470,17 +1493,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Store the top RoundWidth bits.
Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
- Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
- SVOffset, RoundVT, isVolatile, isNonTemporal,
- Alignment);
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(),
+ RoundVT, isVolatile, isNonTemporal, Alignment);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset + IncrementSize, ExtraVT, isVolatile,
- isNonTemporal,
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
}
@@ -1514,9 +1536,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// TRUNCSTORE:i16 i32 -> STORE i16
assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
- Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal,
- Alignment);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
break;
}
}
@@ -1543,8 +1564,8 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
DebugLoc dl = Op.getDebugLoc();
// Store the value to a temporary stack slot, then LOAD the returned part.
SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Add the offset to the index.
unsigned EltSize =
@@ -1560,12 +1581,56 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
if (Op.getValueType().isVector())
- return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0,
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
false, false, 0);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+ MachinePointerInfo(),
+ Vec.getValueType().getVectorElementType(),
+ false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+ SDValue Vec = Op.getOperand(0);
+ SDValue Part = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+
+ SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+ // First store the whole vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ false, false, 0);
+
+ // Then store the inserted part.
+
+ // Add the offset to the index.
+ unsigned EltSize =
+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+
+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, Idx.getValueType()));
+
+ if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
else
- return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr,
- NULL, 0, Vec.getValueType().getVectorElementType(),
- false, false, 0);
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ StackPtr);
+
+ // Store the subvector.
+ Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Finally, load the updated vector.
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
+ false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1578,7 +1643,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
DebugLoc dl = Node->getDebugLoc();
SDValue FIPtr = DAG.CreateStackTemporary(VT);
int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(FI);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
// Emit a store of each element to the stack slot.
SmallVector<SDValue, 8> Stores;
@@ -1597,11 +1662,13 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
// element type, only store the bits necessary.
if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
- Node->getOperand(i), Idx, SV, Offset,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
EltVT, false, false, 0));
} else
Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
- Node->getOperand(i), Idx, SV, Offset,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
false, false, 0));
}
@@ -1613,7 +1680,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
StoreChain = DAG.getEntryNode();
// Result is a load from the stack slot.
- return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0);
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
@@ -1628,7 +1695,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
if (isTypeLegal(IVT)) {
// Convert to an integer with the same sign bit.
- SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2);
+ SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
} else {
// Store the float to memory, then load the sign part out as an integer.
MVT LoadTy = TLI.getPointerTy();
@@ -1636,12 +1703,13 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
// Then store the float to it.
SDValue Ch =
- DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, NULL, 0,
+ DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
false, false, 0);
if (TLI.isBigEndian()) {
assert(FloatVT.isByteSized() && "Unsupported floating point type!");
// Load out a legal integer with the same sign bit as the float.
- SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, NULL, 0, false, false, 0);
+ SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
+ false, false, 0);
} else { // Little endian
SDValue LoadPtr = StackPtr;
// The float may be wider than the integer we are going to load. Advance
@@ -1651,7 +1719,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
LoadPtr, DAG.getIntPtrConstant(ByteOffset));
// Load a legal integer containing the sign bit.
- SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, NULL, 0, false, false, 0);
+ SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
+ false, false, 0);
// Move the sign bit to the top bit of the loaded integer.
unsigned BitShift = LoadTy.getSizeInBits() -
(FloatVT.getSizeInBits() - 8 * ByteOffset);
@@ -1694,7 +1763,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
- unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
if (Align > StackAlign)
SP = DAG.getNode(ISD::AND, dl, VT, SP,
DAG.getConstant(-(uint64_t)Align, VT));
@@ -1768,7 +1837,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
int SPFI = StackPtrFI->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
unsigned SlotSize = SlotVT.getSizeInBits();
@@ -1782,21 +1851,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
if (SrcSize > SlotSize)
Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
- SV, 0, SlotVT, false, false, SrcAlign);
+ PtrInfo, SlotVT, false, false, SrcAlign);
else {
assert(SrcSize == SlotSize && "Invalid store");
Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
- SV, 0, false, false, SrcAlign);
+ PtrInfo, false, false, SrcAlign);
}
// Result is a load from the stack slot.
if (SlotSize == DestSize)
- return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false,
- DestAlign);
+ return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
+ false, false, DestAlign);
assert(SlotSize < DestSize && "Unknown extension!");
- return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT,
- false, false, DestAlign);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
+ PtrInfo, SlotVT, false, false, DestAlign);
}
SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
@@ -1810,11 +1879,11 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
+ MachinePointerInfo::getFixedStack(SPFI),
Node->getValueType(0).getVectorElementType(),
false, false, 0);
return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
+ MachinePointerInfo::getFixedStack(SPFI),
false, false, 0);
}
@@ -1888,7 +1957,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, Alignment);
}
@@ -1924,7 +1993,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
// and leave the Hi part unset.
SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
bool isSigned) {
- assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
// The input chain to this libcall is the entry node of the function.
// Legalizing the call will automatically add the previous call to the
// dependence.
@@ -1945,12 +2013,20 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
// Splice the libcall in wherever FindInputOutputChains tells us to.
const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+ // isTailCall may be true since the callee does not reference caller stack
+ // frame. Check if it's in the right position.
+ bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, TLI.getLibcallCallingConv(LC), false,
+ 0, TLI.getLibcallCallingConv(LC), isTailCall,
/*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
+ if (!CallInfo.second.getNode())
+ // It's a tailcall, return the chain (which is the DAG root).
+ return DAG.getRoot();
+
// Legalize the call sequence, starting with the chain. This will advance
// the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
// was added by LowerCallTo (guaranteeing proper serialization of calls).
@@ -1964,7 +2040,6 @@ std::pair<SDValue, SDValue>
SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node,
bool isSigned) {
- assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
SDValue InChain = Node->getOperand(0);
TargetLowering::ArgListTy Args;
@@ -1985,7 +2060,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, TLI.getLibcallCallingConv(LC), false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
/*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
@@ -2064,16 +2139,17 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
}
// store the lo of the constructed double - based on integer input
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
- Op0Mapped, Lo, NULL, 0,
+ Op0Mapped, Lo, MachinePointerInfo(),
false, false, 0);
// initial hi portion of constructed double
SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
// store the hi of the constructed double - biased exponent
- SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0,
- false, false, 0);
+ SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi,
+ MachinePointerInfo(),
+ false, false, 0);
// load the constructed double
- SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0,
- false, false, 0);
+ SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
+ MachinePointerInfo(), false, false, 0);
// FP constant to bias correct the final result
SDValue Bias = DAG.getConstantFP(isSigned ?
BitsToDouble(0x4330000080000000ULL) :
@@ -2116,17 +2192,40 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
DAG.getConstant(32, MVT::i64));
SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
- SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr);
- SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr);
+ SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
+ SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
TwoP84PlusTwoP52);
return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
}
- // Implementation of unsigned i64 to f32. This implementation has the
- // advantage of performing rounding correctly.
+ // Implementation of unsigned i64 to f32.
// TODO: Generalize this for use with other types.
if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+ // For unsigned conversions, convert them to signed conversions using the
+ // algorithm from the x86_64 __floatundidf in compiler_rt.
+ if (!isSigned) {
+ SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
+
+ SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy());
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, MVT::i64);
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
+
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
+ SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
+
+ // TODO: This really should be implemented using a branch rather than a
+ // select. We happen to get lucky and machinesink does the right
+ // thing most of the time. This would be a good candidate for a
+ //pseudo-op, or, even better, for whole-function isel.
+ SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
+ return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast);
+ }
+
+ // Otherwise, implement the fully general conversion.
EVT SHVT = TLI.getShiftAmountTy();
SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
@@ -2140,7 +2239,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
- ISD::SETUGE);
+ ISD::SETUGE);
SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
@@ -2155,7 +2254,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
DAG.getIntPtrConstant(0));
-
}
SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
@@ -2189,13 +2287,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue FudgeInReg;
if (DestVT == MVT::f32)
FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, Alignment);
else {
FudgeInReg =
- LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl,
+ LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
MVT::f32, false, false, Alignment));
}
@@ -2332,6 +2430,18 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
}
}
+/// SplatByte - Distribute ByteVal over NumBits bits.
+// FIXME: Move this helper to a common place.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+ APInt Val = APInt(NumBits, ByteVal);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ }
+ return Val;
+}
+
/// ExpandBitCount - Expand the specified bitcount instruction into operations.
///
SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
@@ -2339,26 +2449,45 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
switch (Opc) {
default: assert(0 && "Cannot expand this yet!");
case ISD::CTPOP: {
- static const uint64_t mask[6] = {
- 0x5555555555555555ULL, 0x3333333333333333ULL,
- 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
- 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
- };
EVT VT = Op.getValueType();
EVT ShVT = TLI.getShiftAmountTy();
- unsigned len = VT.getSizeInBits();
- for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
- //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
- unsigned EltSize = VT.isVector() ?
- VT.getVectorElementType().getSizeInBits() : len;
- SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT);
- SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
- Op = DAG.getNode(ISD::ADD, dl, VT,
- DAG.getNode(ISD::AND, dl, VT, Op, Tmp2),
- DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3),
- Tmp2));
- }
+ unsigned Len = VT.getSizeInBits();
+
+ assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
+ "CTPOP not implemented for this type.");
+
+ // This is the "best" algorithm from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+
+ SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT);
+ SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT);
+ SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT);
+ SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT);
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(1, ShVT)),
+ Mask55));
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ Op = DAG.getNode(ISD::ADD, dl, VT,
+ DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(2, ShVT)),
+ Mask33));
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Op = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(4, ShVT))),
+ Mask0F);
+ // v = (v * 0x01010101...) >> (Len - 8)
+ Op = DAG.getNode(ISD::SRL, dl, VT,
+ DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ DAG.getConstant(Len - 8, ShVT));
+
return Op;
}
case ISD::CTLZ: {
@@ -2516,9 +2645,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::PREFETCH:
case ISD::VAEND:
case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_DISPATCHSETUP:
+ // If the target didn't expand these, there's nothing to do, so just
+ // preserve the chain and be done.
Results.push_back(Node->getOperand(0));
break;
case ISD::EH_SJLJ_SETJMP:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
Results.push_back(DAG.getConstant(0, MVT::i32));
Results.push_back(Node->getOperand(0));
break;
@@ -2527,7 +2661,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TargetLowering::ArgListTy Args;
std::pair<SDValue, SDValue> CallResult =
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C, false,
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__sync_synchronize",
TLI.getPointerTy()),
@@ -2538,7 +2673,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
// By default, atomic intrinsics are marked Legal and lowered. Targets
// which don't support them directly, however, may want libcalls, in which
// case they mark them Expand, and we get here.
- // FIXME: Unimplemented for now. Add libcalls.
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
@@ -2578,7 +2712,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TargetLowering::ArgListTy Args;
std::pair<SDValue, SDValue> CallResult =
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C, false,
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("abort", TLI.getPointerTy()),
Args, DAG, dl);
@@ -2586,7 +2721,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
break;
}
case ISD::FP_ROUND:
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
Node->getValueType(0), dl);
Results.push_back(Tmp1);
@@ -2637,8 +2772,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
SDValue True, False;
EVT VT = Node->getOperand(0).getValueType();
EVT NVT = Node->getValueType(0);
- const uint64_t zero[] = {0, 0};
- APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+ APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
APInt x = APInt::getSignBit(NVT.getSizeInBits());
(void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
Tmp1 = DAG.getConstantFP(apf, VT);
@@ -2662,8 +2796,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Tmp2 = Node->getOperand(1);
unsigned Align = Node->getConstantOperandVal(3);
- SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0,
- false, false, 0);
+ SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
+ MachinePointerInfo(V), false, false, 0);
SDValue VAList = VAListLoad;
if (Align > TLI.getMinStackArgumentAlignment()) {
@@ -2674,7 +2808,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TLI.getPointerTy()));
VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList,
- DAG.getConstant(-Align,
+ DAG.getConstant(-(int64_t)Align,
TLI.getPointerTy()));
}
@@ -2684,10 +2818,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
TLI.getPointerTy()));
// Store the incremented VAList to the legalized pointer
- Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0,
- false, false, 0);
+ Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
+ MachinePointerInfo(V), false, false, 0);
// Load the actual argument out of the pointer VAList
- Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0,
+ Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
false, false, 0));
Results.push_back(Results[0].getValue(1));
break;
@@ -2698,16 +2832,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
- Node->getOperand(2), VS, 0, false, false, 0);
- Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0,
- false, false, 0);
+ Node->getOperand(2), MachinePointerInfo(VS),
+ false, false, 0);
+ Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+ MachinePointerInfo(VD), false, false, 0);
Results.push_back(Tmp1);
break;
}
case ISD::EXTRACT_VECTOR_ELT:
if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
// This must be an access of the only element. Return it.
- Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0),
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
Node->getOperand(0));
else
Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
@@ -2716,6 +2851,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::EXTRACT_SUBVECTOR:
Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
break;
+ case ISD::INSERT_SUBVECTOR:
+ Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+ break;
case ISD::CONCAT_VECTORS: {
Results.push_back(ExpandVectorBuildThroughStack(Node));
break;
@@ -3094,14 +3232,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
RHS);
TopHalf = BottomHalf.getValue(1);
- } else {
- // FIXME: We should be able to fall back to a libcall with an illegal
- // type in some cases.
- // Also, we can fall back to a division in some cases, but that's a big
- // performance hit in the general case.
- assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits() * 2)) &&
- "Don't know how to expand this operation yet!");
+ } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() * 2))) {
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
@@ -3110,6 +3242,30 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
DAG.getIntPtrConstant(0));
TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
DAG.getIntPtrConstant(1));
+ } else {
+ // We can fall back to a libcall with an illegal type for the MUL if we
+ // have a libcall big enough.
+ // Also, we can fall back to a division in some cases, but that's a big
+ // performance hit in the general case.
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (WideVT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (WideVT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (WideVT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (WideVT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+ LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+ RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+
+ SDValue Ret = ExpandLibCall(LC, Node, isSigned);
+ BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret);
+ TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret,
+ DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy()));
+ TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf);
}
if (isSigned) {
Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
@@ -3165,8 +3321,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
- SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr,
- PseudoSourceValue::getJumpTable(), 0, MemVT,
+ SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(), MemVT,
false, false, 0);
Addr = LD;
if (TM.getRelocationModel() == Reloc::PIC_) {
@@ -3329,8 +3485,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
case ISD::XOR: {
unsigned ExtOp, TruncOp;
if (OVT.isVector()) {
- ExtOp = ISD::BIT_CONVERT;
- TruncOp = ISD::BIT_CONVERT;
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
} else {
assert(OVT.isInteger() && "Cannot promote logic operation");
ExtOp = ISD::ANY_EXTEND;
@@ -3347,8 +3503,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
case ISD::SELECT: {
unsigned ExtOp, TruncOp;
if (Node->getValueType(0).isVector()) {
- ExtOp = ISD::BIT_CONVERT;
- TruncOp = ISD::BIT_CONVERT;
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
} else if (Node->getValueType(0).isInteger()) {
ExtOp = ISD::ANY_EXTEND;
TruncOp = ISD::TRUNCATE;
@@ -3375,12 +3531,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
// Cast the two input vectors.
- Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));
- Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1));
// Convert the shuffle mask to the right # elements.
Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
- Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1);
Results.push_back(Tmp1);
break;
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 650ee5a..2775212 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -55,7 +55,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to soften the result of this operator!");
- case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break;
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
case ISD::ConstantFP:
R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
@@ -102,7 +102,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
SetSoftenedFloat(SDValue(N, ResNo), R);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
return BitConvertToInteger(N->getOperand(0));
}
@@ -133,8 +133,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
unsigned Size = NVT.getSizeInBits();
// Mask = ~(1 << (Size-1))
- SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1),
- NVT);
+ APInt API = APInt::getAllOnesValue(Size);
+ API.clearBit(Size-1);
+ SDValue Mask = DAG.getConstant(API, NVT);
SDValue Op = GetSoftenedFloat(N->getOperand(0));
return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);
}
@@ -455,7 +456,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
if (L->getExtensionType() == ISD::NON_EXTLOAD) {
NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
- L->getSrcValue(), L->getSrcValueOffset(), NVT,
+ L->getPointerInfo(), NVT,
L->isVolatile(), L->isNonTemporal(), L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -466,8 +467,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
// Do a non-extending load followed by FP_EXTEND.
NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD,
L->getMemoryVT(), dl, L->getChain(),
- L->getBasePtr(), L->getOffset(),
- L->getSrcValue(), L->getSrcValueOffset(),
+ L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
L->getMemoryVT(), L->isVolatile(),
L->isNonTemporal(), L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
@@ -558,7 +558,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
#endif
llvm_unreachable("Do not know how to soften this operator's operand!");
- case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
@@ -670,8 +670,8 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
}
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) {
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
GetSoftenedFloat(N->getOperand(0)));
}
@@ -780,7 +780,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
Val = GetSoftenedFloat(Val);
return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
}
@@ -816,7 +816,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
- case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
@@ -1110,9 +1110,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
assert(NVT.isByteSized() && "Expanded type not byte sized!");
assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
- Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr,
- LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->getMemoryVT(), LD->isVolatile(),
+ Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+ LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
// Remember the chain.
@@ -1222,7 +1221,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
#endif
llvm_unreachable("Do not know how to expand this operator's operand!");
- case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
@@ -1421,7 +1420,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
GetExpandedOp(ST->getValue(), Lo, Hi);
return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->getMemoryVT(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index f8c5890..f0752df 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -49,7 +49,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
llvm_unreachable("Do not know how to promote this operator!");
case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
- case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
@@ -143,7 +143,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
N->getMemoryVT(),
N->getChain(), N->getBasePtr(),
- Op2, N->getSrcValue(), N->getAlignment());
+ Op2, N->getMemOperand());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -155,14 +155,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
SDValue Op3 = GetPromotedInteger(N->getOperand(3));
SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
N->getMemoryVT(), N->getChain(), N->getBasePtr(),
- Op2, Op3, N->getSrcValue(), N->getAlignment());
+ Op2, Op3, N->getMemOperand());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
SDValue InOp = N->getOperand(0);
EVT InVT = InOp.getValueType();
EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
@@ -179,8 +179,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
case PromoteInteger:
if (NOutVT.bitsEq(NInVT))
// The input promotes to the same size. Convert the promoted value.
- return DAG.getNode(ISD::BIT_CONVERT, dl,
- NOutVT, GetPromotedInteger(InOp));
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
break;
case SoftenFloat:
// Promote the integer operand by hand.
@@ -193,7 +192,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
BitConvertToInteger(GetScalarizedVector(InOp)));
case SplitVector: {
- // For example, i32 = BIT_CONVERT v2i16 on alpha. Convert the split
+ // For example, i32 = BITCAST v2i16 on alpha. Convert the split
// pieces of the input into integers and reassemble in the final type.
SDValue Lo, Hi;
GetSplitVector(N->getOperand(0), Lo, Hi);
@@ -207,12 +206,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
EVT::getIntegerVT(*DAG.getContext(),
NOutVT.getSizeInBits()),
JoinIntegers(Lo, Hi));
- return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
}
case WidenVector:
if (OutVT.bitsEq(NInVT))
// The input is widened to the same size. Convert to the widened value.
- return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp));
+ return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
}
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
@@ -293,7 +292,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
APInt TopBit(NVT.getSizeInBits(), 0);
- TopBit.set(OVT.getSizeInBits());
+ TopBit.setBit(OVT.getSizeInBits());
Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
}
@@ -371,8 +370,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
ISD::LoadExtType ExtType =
ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
DebugLoc dl = N->getDebugLoc();
- SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(),
- N->getSrcValue(), N->getSrcValueOffset(),
+ SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+ N->getPointerInfo(),
N->getMemoryVT(), N->isVolatile(),
N->isNonTemporal(), N->getAlignment());
@@ -549,6 +548,48 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
return Res;
}
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+ // Promote the overflow bit trivially.
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ EVT SmallVT = LHS.getValueType();
+
+ // To determine if the result overflowed in a larger type, we extend the input
+ // to the larger type, do the multiply, then check the high bits of the result
+ // to see if the overflow happened.
+ if (N->getOpcode() == ISD::SMULO) {
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ } else {
+ LHS = ZExtPromotedInteger(LHS);
+ RHS = ZExtPromotedInteger(RHS);
+ }
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+
+ // Overflow occurred iff the high part of the result does not zero/sign-extend
+ // the low part.
+ SDValue Overflow;
+ if (N->getOpcode() == ISD::UMULO) {
+ // Unsigned overflow occurred iff the high part is non-zero.
+ SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+ DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
+ DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
+ } else {
+ // Signed overflow occurred iff the high part does not sign extend the low.
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
+ Mul, DAG.getValueType(SmallVT));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
+ }
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return Mul;
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
// Zero extend the input.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
@@ -602,11 +643,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
return Res;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
- assert(ResNo == 1 && "Only boolean result promotion currently supported!");
- return PromoteIntRes_Overflow(N);
-}
-
//===----------------------------------------------------------------------===//
// Integer Operand Promotion
//===----------------------------------------------------------------------===//
@@ -631,7 +667,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
llvm_unreachable("Do not know how to promote this operator's operand!");
case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
- case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break;
case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
@@ -713,7 +749,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
// This should only occur in unusual situations like bitcasting to an
// x86_fp80, so just turn it into a store+load
return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
@@ -889,7 +925,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
- int SVOffset = N->getSrcValueOffset();
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
@@ -898,8 +933,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
// Truncate the value and store the result.
- return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(),
- SVOffset, N->getMemoryVT(),
+ return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(),
+ N->getMemoryVT(),
isVolatile, isNonTemporal, Alignment);
}
@@ -951,7 +986,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
- case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
@@ -978,6 +1013,23 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP: {
+ std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
+ SplitInteger(Tmp.first, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ break;
+ }
+
case ISD::AND:
case ISD::OR:
case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
@@ -999,6 +1051,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
case ISD::UADDO:
case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+ case ISD::UMULO:
+ case ISD::SMULO: ExpandIntRes_UMULSMULO(N, Lo, Hi); break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1006,11 +1060,98 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
}
+/// Lower an atomic node to the appropriate builtin call.
+std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
+ unsigned Opc = Node->getOpcode();
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic intrinsic Expand!");
+ break;
+ case ISD::ATOMIC_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ }
+ break;
+ case ISD::ATOMIC_CMP_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_ADD:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ }
+ break;
+ }
+
+ return ExpandChainLibCall(LC, Node, false);
+}
+
/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
/// and the shift amount is a constant 'Amt'. Expand the operation.
void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
SDValue &Lo, SDValue &Hi) {
- DebugLoc dl = N->getDebugLoc();
+ DebugLoc DL = N->getDebugLoc();
// Expand the incoming operand to be shifted, so that we have its parts
SDValue InL, InH;
GetExpandedInteger(N->getOperand(0), InL, InH);
@@ -1025,8 +1166,8 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
Lo = Hi = DAG.getConstant(0, NVT);
} else if (Amt > NVTBits) {
Lo = DAG.getConstant(0, NVT);
- Hi = DAG.getNode(ISD::SHL, dl,
- NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy));
+ Hi = DAG.getNode(ISD::SHL, DL,
+ NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy));
} else if (Amt == NVTBits) {
Lo = DAG.getConstant(0, NVT);
Hi = InL;
@@ -1034,17 +1175,17 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
TLI.isOperationLegalOrCustom(ISD::ADDC,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
// Emit this X << 1 as X+X.
- SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
SDValue LoOps[2] = { InL, InL };
- Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2);
SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
- Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3);
} else {
- Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy));
- Hi = DAG.getNode(ISD::OR, dl, NVT,
- DAG.getNode(ISD::SHL, dl, NVT, InH,
+ Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(Amt, ShTy)),
- DAG.getNode(ISD::SRL, dl, NVT, InL,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(NVTBits-Amt, ShTy)));
}
return;
@@ -1055,43 +1196,43 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
Lo = DAG.getConstant(0, NVT);
Hi = DAG.getConstant(0, NVT);
} else if (Amt > NVTBits) {
- Lo = DAG.getNode(ISD::SRL, dl,
+ Lo = DAG.getNode(ISD::SRL, DL,
NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
Hi = DAG.getConstant(0, NVT);
} else if (Amt == NVTBits) {
Lo = InH;
Hi = DAG.getConstant(0, NVT);
} else {
- Lo = DAG.getNode(ISD::OR, dl, NVT,
- DAG.getNode(ISD::SRL, dl, NVT, InL,
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(Amt, ShTy)),
- DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(NVTBits-Amt, ShTy)));
- Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
}
return;
}
assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
if (Amt > VTBits) {
- Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits-1, ShTy));
} else if (Amt > NVTBits) {
- Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(Amt-NVTBits, ShTy));
- Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits-1, ShTy));
} else if (Amt == NVTBits) {
Lo = InH;
- Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits-1, ShTy));
} else {
- Lo = DAG.getNode(ISD::OR, dl, NVT,
- DAG.getNode(ISD::SRL, dl, NVT, InL,
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(Amt, ShTy)),
- DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(NVTBits-Amt, ShTy)));
- Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
}
}
@@ -1269,7 +1410,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
// Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
// them. TODO: Teach operation legalization how to expand unsupported
// ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
- // a carry of type MVT::Flag, but there doesn't seem to be any way to
+ // a carry of type MVT::Glue, but there doesn't seem to be any way to
// generate a value of this type in the expanded code sequence.
bool hasCarry =
TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
@@ -1277,7 +1418,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (hasCarry) {
- SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
if (N->getOpcode() == ISD::ADD) {
Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
HiOps[2] = Lo.getValue(1);
@@ -1287,31 +1428,32 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
HiOps[2] = Lo.getValue(1);
Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
}
+ return;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+ SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
+ SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
+ ISD::SETULT);
+ SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
+ DAG.getConstant(1, NVT), Carry1);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
} else {
- if (N->getOpcode() == ISD::ADD) {
- Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
- SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
- ISD::SETULT);
- SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
- DAG.getConstant(1, NVT),
- DAG.getConstant(0, NVT));
- SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
- ISD::SETULT);
- SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
- DAG.getConstant(1, NVT), Carry1);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
- } else {
- Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
- Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
- SDValue Cmp =
- DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
- LoOps[0], LoOps[1], ISD::SETULT);
- SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
- DAG.getConstant(1, NVT),
- DAG.getConstant(0, NVT));
- Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
- }
+ Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+ SDValue Cmp =
+ DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
+ LoOps[0], LoOps[1], ISD::SETULT);
+ SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
}
}
@@ -1322,7 +1464,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
DebugLoc dl = N->getDebugLoc();
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
- SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
SDValue LoOps[2] = { LHSL, RHSL };
SDValue HiOps[3] = { LHSH, RHSH };
@@ -1348,7 +1490,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
DebugLoc dl = N->getDebugLoc();
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
- SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
SDValue HiOps[3] = { LHSH, RHSH };
@@ -1437,7 +1579,7 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned NBitWidth = NVT.getSizeInBits();
const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
- Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT);
+ Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT);
Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
}
@@ -1524,7 +1666,6 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
ISD::LoadExtType ExtType = N->getExtensionType();
- int SVOffset = N->getSrcValueOffset();
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
@@ -1535,7 +1676,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
if (N->getMemoryVT().bitsLE(NVT)) {
EVT MemVT = N->getMemoryVT();
- Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
MemVT, isVolatile, isNonTemporal, Alignment);
// Remember the chain.
@@ -1557,7 +1698,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
}
} else if (TLI.isLittleEndian()) {
// Little-endian - low bits are at low addresses.
- Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
unsigned ExcessBits =
@@ -1568,8 +1709,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize, NEVT,
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
@@ -1586,7 +1727,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
unsigned ExcessBits = (EBytes - IncrementSize)*8;
// Load both the high bits and maybe some of the low bits.
- Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
EVT::getIntegerVT(*DAG.getContext(),
MemVT.getSizeInBits() - ExcessBits),
isVolatile, isNonTemporal, Alignment);
@@ -1595,8 +1736,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
// Load the rest of the low bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize,
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
@@ -1987,6 +2128,31 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Ofl);
}
+void DAGTypeLegalizer::ExpandIntRes_UMULSMULO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() / 2);
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Ret = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
+ SplitInteger(Ret, Lo, Hi);
+
+ // Now calculate overflow.
+ SDValue Ofl;
+ if (N->getOpcode() == ISD::UMULO)
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi,
+ DAG.getConstant(0, VT), ISD::SETNE);
+ else {
+ SDValue Tmp = DAG.getConstant(VT.getSizeInBits() - 1, HalfVT);
+ Tmp = DAG.getNode(ISD::SRA, dl, HalfVT, Lo, Tmp);
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, Tmp, ISD::SETNE);
+ }
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
@@ -2078,7 +2244,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
#endif
llvm_unreachable("Do not know how to expand this operator's operand!");
- case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
@@ -2308,7 +2474,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
- int SVOffset = N->getSrcValueOffset();
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
@@ -2319,14 +2484,16 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (N->getMemoryVT().bitsLE(NVT)) {
GetExpandedInteger(N->getValue(), Lo, Hi);
- return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
N->getMemoryVT(), isVolatile, isNonTemporal,
Alignment);
- } else if (TLI.isLittleEndian()) {
+ }
+
+ if (TLI.isLittleEndian()) {
// Little-endian - low bits are at low addresses.
GetExpandedInteger(N->getValue(), Lo, Hi);
- Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
unsigned ExcessBits =
@@ -2337,50 +2504,49 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize, NEVT,
- isVolatile, isNonTemporal,
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ NEVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
- } else {
- // Big-endian - high bits are at low addresses. Favor aligned stores at
- // the cost of some bit-fiddling.
- GetExpandedInteger(N->getValue(), Lo, Hi);
-
- EVT ExtVT = N->getMemoryVT();
- unsigned EBytes = ExtVT.getStoreSize();
- unsigned IncrementSize = NVT.getSizeInBits()/8;
- unsigned ExcessBits = (EBytes - IncrementSize)*8;
- EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
- ExtVT.getSizeInBits() - ExcessBits);
+ }
- if (ExcessBits < NVT.getSizeInBits()) {
- // Transfer high bits from the top of Lo to the bottom of Hi.
- Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
- DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
- TLI.getPointerTy()));
- Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
- DAG.getNode(ISD::SRL, dl, NVT, Lo,
- DAG.getConstant(ExcessBits,
- TLI.getPointerTy())));
- }
+ // Big-endian - high bits are at low addresses. Favor aligned stores at
+ // the cost of some bit-fiddling.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ EVT ExtVT = N->getMemoryVT();
+ unsigned EBytes = ExtVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+ EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+ ExtVT.getSizeInBits() - ExcessBits);
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer high bits from the top of Lo to the bottom of Hi.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SRL, dl, NVT, Lo,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ }
- // Store both the high bits and maybe some of the low bits.
- Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
- SVOffset, HiVT, isVolatile, isNonTemporal,
- Alignment);
+ // Store both the high bits and maybe some of the low bits.
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
+ HiVT, isVolatile, isNonTemporal, Alignment);
- // Increment the pointer to the other half.
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
- // Store the lowest ExcessBits bits in the second half.
- Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize,
- EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
- isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
- }
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Store the lowest ExcessBits bits in the second half.
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
@@ -2460,8 +2626,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
// Load the value out, extending it from f32 to the destination float type.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(),
- FudgePtr, NULL, 0, MVT::f32,
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+ FudgePtr,
+ MachinePointerInfo::getConstantPool(),
+ MVT::f32,
false, false, Alignment);
return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 6e56c98..cedda7e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -714,6 +714,11 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
if (M->getNodeId() == Processed)
RemapValue(NewVal);
DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+ // OldVal may be a target of the ReplacedValues map which was marked
+ // NewNode to force reanalysis because it was updated. Ensure that
+ // anything that ReplacedValues mapped to OldVal will now be mapped
+ // all the way to NewVal.
+ ReplacedValues[OldVal] = NewVal;
}
// The original node continues to exist in the DAG, marked NewNode.
}
@@ -858,7 +863,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
/// BitConvertToInteger - Convert to an integer of the same size.
SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
unsigned BitWidth = Op.getValueType().getSizeInBits();
- return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
}
@@ -869,7 +874,7 @@ SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
unsigned NumElts = Op.getValueType().getVectorNumElements();
- return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
}
@@ -880,10 +885,11 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
// the source and destination types.
SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
// Emit a store to the stack slot.
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Result is a load from the stack slot.
- return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0);
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, 0);
}
/// CustomLowerNode - Replace the node's results with custom code provided
@@ -1049,6 +1055,39 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
return CallInfo.first;
}
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node,
+ bool isSigned) {
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Node->getDebugLoc());
+
+ return CallInfo;
+}
+
/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
/// of the given type. A target boolean is an integer value, not necessarily of
/// type i1, the bits of which conform to getBooleanContents.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d560292..3f81bbb 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -99,7 +99,7 @@ private:
return SoftenFloat;
return ExpandFloat;
}
-
+
if (VT.getVectorNumElements() == 1)
return ScalarizeVector;
return SplitVector;
@@ -192,6 +192,10 @@ private:
SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
const SDValue *Ops, unsigned NumOps, bool isSigned,
DebugLoc dl);
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
void ReplaceValueWith(SDValue From, SDValue To);
void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
@@ -244,7 +248,7 @@ private:
SDValue PromoteIntRes_AssertZext(SDNode *N);
SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
- SDValue PromoteIntRes_BIT_CONVERT(SDNode *N);
+ SDValue PromoteIntRes_BITCAST(SDNode *N);
SDValue PromoteIntRes_BSWAP(SDNode *N);
SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
SDValue PromoteIntRes_Constant(SDNode *N);
@@ -278,7 +282,7 @@ private:
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
- SDValue PromoteIntOp_BIT_CONVERT(SDNode *N);
+ SDValue PromoteIntOp_BITCAST(SDNode *N);
SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
@@ -344,6 +348,7 @@ private:
void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UMULSMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandShiftByConstant(SDNode *N, unsigned Amt,
SDValue &Lo, SDValue &Hi);
@@ -352,7 +357,7 @@ private:
// Integer Operand Expansion.
bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
- SDValue ExpandIntOp_BIT_CONVERT(SDNode *N);
+ SDValue ExpandIntOp_BITCAST(SDNode *N);
SDValue ExpandIntOp_BR_CC(SDNode *N);
SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
@@ -387,7 +392,7 @@ private:
// Result Float to Integer Conversion.
void SoftenFloatResult(SDNode *N, unsigned OpNo);
- SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
@@ -426,7 +431,7 @@ private:
// Operand Float to Integer Conversion.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
- SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N);
+ SDValue SoftenFloatOp_BITCAST(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
@@ -515,7 +520,7 @@ private:
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
SDValue ScalarizeVecRes_InregOp(SDNode *N);
- SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N);
+ SDValue ScalarizeVecRes_BITCAST(SDNode *N);
SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FPOWI(SDNode *N);
@@ -532,7 +537,7 @@ private:
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
- SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N);
+ SDValue ScalarizeVecOp_BITCAST(SDNode *N);
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -557,7 +562,7 @@ private:
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -577,11 +582,12 @@ private:
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_UnaryOp(SDNode *N);
- SDValue SplitVecOp_BIT_CONVERT(SDNode *N);
+ SDValue SplitVecOp_BITCAST(SDNode *N);
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue SplitVecOp_FP_ROUND(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
@@ -603,7 +609,7 @@ private:
// Widen Vector Result Promotion.
void WidenVectorResult(SDNode *N, unsigned ResNo);
- SDValue WidenVecRes_BIT_CONVERT(SDNode* N);
+ SDValue WidenVecRes_BITCAST(SDNode* N);
SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
@@ -628,7 +634,7 @@ private:
// Widen Vector Operand.
bool WidenVectorOperand(SDNode *N, unsigned ResNo);
- SDValue WidenVecOp_BIT_CONVERT(SDNode *N);
+ SDValue WidenVecOp_BITCAST(SDNode *N);
SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
@@ -721,7 +727,7 @@ private:
}
// Generic Result Expansion.
- void ExpandRes_BIT_CONVERT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -729,7 +735,7 @@ private:
void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);
// Generic Operand Expansion.
- SDValue ExpandOp_BIT_CONVERT (SDNode *N);
+ SDValue ExpandOp_BITCAST (SDNode *N);
SDValue ExpandOp_BUILD_VECTOR (SDNode *N);
SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);
SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 9c2b1d9..a75ae87 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -32,8 +32,7 @@ using namespace llvm;
// little/big-endian machines, followed by the Hi/Lo part. This means that
// they cannot be used as is on vectors, for which Lo is always stored first.
-void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
SDValue InOp = N->getOperand(0);
@@ -50,31 +49,31 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
case SoftenFloat:
// Convert the integer operand instead.
SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
case ExpandInteger:
case ExpandFloat:
// Convert the expanded pieces of the input.
GetExpandedOp(InOp, Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
case SplitVector:
GetSplitVector(InOp, Lo, Hi);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
case ScalarizeVector:
// Convert the element instead.
SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
case WidenVector: {
- assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT");
+ assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
InOp = GetWidenedVector(InOp);
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
InVT.getVectorNumElements()/2);
@@ -84,19 +83,19 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
}
}
if (InVT.isVector() && OutVT.isInteger()) {
- // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand
+ // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
// is legal but the result is not.
EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
if (isTypeLegal(NVT)) {
- SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp);
+ SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
DAG.getIntPtrConstant(0));
Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
@@ -119,14 +118,14 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
getTypeForEVT(*DAG.getContext()));
SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
// Emit a store to the stack slot.
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0,
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
false, false, 0);
// Load the first half from the stack slot.
- Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0);
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0);
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -134,7 +133,8 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
DAG.getIntPtrConstant(IncrementSize));
// Load the second half from the stack slot.
- Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,
+ Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
+ PtrInfo.getWithOffset(IncrementSize), false,
false, MinAlign(Alignment, IncrementSize));
// Handle endianness of the load.
@@ -172,7 +172,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
EVT OldVT = N->getValueType(0);
EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
- SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
EVT::getVectorVT(*DAG.getContext(),
NewVT, 2*OldElts),
OldVec);
@@ -204,22 +204,21 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
- int SVOffset = LD->getSrcValueOffset();
unsigned Alignment = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
- Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset+IncrementSize,
+ Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
@@ -262,14 +261,14 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Generic Operand Expansion.
//===--------------------------------------------------------------------===//
-SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
if (N->getValueType(0).isVector()) {
// An illegal expanding type is being converted to a legal vector type.
// Make a two element vector out of the expanded parts and convert that
// instead, but only if the new vector type is legal (otherwise there
// is no point, and it might create expansion loops). For example, on
- // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.
+ // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
EVT OVT = N->getOperand(0).getValueType();
EVT NVT = EVT::getVectorVT(*DAG.getContext(),
TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
@@ -283,7 +282,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
std::swap(Parts[0], Parts[1]);
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
- return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec);
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
}
}
@@ -322,7 +321,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
&NewElts[0], NewElts.size());
// Convert the new vector to the old vector type.
- return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
}
SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
@@ -347,7 +346,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
// Bitconvert to a vector of twice the length with elements of the expanded
// type, insert the expanded vector elements, and then convert back.
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
- SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
NewVecVT, N->getOperand(0));
SDValue Lo, Hi;
@@ -363,7 +362,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
// Convert the new vector to the old vector type.
- return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
}
SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
@@ -390,7 +389,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
St->getValue().getValueType());
SDValue Chain = St->getChain();
SDValue Ptr = St->getBasePtr();
- int SVOffset = St->getSrcValueOffset();
unsigned Alignment = St->getAlignment();
bool isVolatile = St->isVolatile();
bool isNonTemporal = St->isNonTemporal();
@@ -404,14 +402,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
- Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),
- SVOffset + IncrementSize,
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+ St->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 621c087..167dbe0 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -241,14 +241,14 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
if (Op.getOperand(j).getValueType().isVector())
- Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j));
+ Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
else
Operands[j] = Op.getOperand(j);
}
Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 93bc2d0..182f8fc 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -46,7 +46,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to scalarize the result of this operator!");
- case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break;
+ case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: R = N->getOperand(0); break;
case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
@@ -122,9 +122,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
EVT NewVT = N->getValueType(0).getVectorElementType();
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
NewVT, N->getOperand(0));
}
@@ -171,7 +171,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
N->getDebugLoc(),
N->getChain(), N->getBasePtr(),
DAG.getUNDEF(N->getBasePtr().getValueType()),
- N->getSrcValue(), N->getSrcValueOffset(),
+ N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
N->getOriginalAlignment());
@@ -296,8 +296,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
dbgs() << "\n";
#endif
llvm_unreachable("Do not know how to scalarize this operator's operand!");
- case ISD::BIT_CONVERT:
- Res = ScalarizeVecOp_BIT_CONVERT(N);
+ case ISD::BITCAST:
+ Res = ScalarizeVecOp_BITCAST(N);
break;
case ISD::CONCAT_VECTORS:
Res = ScalarizeVecOp_CONCAT_VECTORS(N);
@@ -326,11 +326,11 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
return false;
}
-/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs
+/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs
/// to be scalarized, it must be <1 x ty>. Convert the element instead.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
SDValue Elt = GetScalarizedVector(N->getOperand(0));
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
N->getValueType(0), Elt);
}
@@ -365,14 +365,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
if (N->isTruncatingStore())
return DAG.getTruncStore(N->getChain(), dl,
GetScalarizedVector(N->getOperand(1)),
- N->getBasePtr(),
- N->getSrcValue(), N->getSrcValueOffset(),
+ N->getBasePtr(), N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
N->getAlignment());
return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
- N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
+ N->getBasePtr(), N->getPointerInfo(),
N->isVolatile(), N->isNonTemporal(),
N->getOriginalAlignment());
}
@@ -407,7 +406,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
- case ISD::BIT_CONVERT: SplitVecRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;
@@ -497,8 +496,8 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
}
-void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
// We know the result is a vector. The input may be either a vector or a
// scalar value.
EVT LoVT, HiVT;
@@ -526,8 +525,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
GetExpandedOp(InOp, Lo, Hi);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
return;
}
break;
@@ -535,8 +534,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
// If the input is a vector that needs to be split, convert each split
// piece of the input now.
GetSplitVector(InOp, Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
return;
}
@@ -550,8 +549,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
}
void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
@@ -626,9 +625,9 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
LoVT.getVectorNumElements());
VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(0));
+ DAG.getIntPtrConstant(0));
VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
break;
}
}
@@ -646,16 +645,15 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Vec = N->getOperand(0);
SDValue Idx = N->getOperand(1);
- EVT IdxVT = Idx.getValueType();
DebugLoc dl = N->getDebugLoc();
EVT LoVT, HiVT;
GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
- Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx,
- DAG.getConstant(LoVT.getVectorNumElements(), IdxVT));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx);
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+ DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements()));
}
void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
@@ -705,8 +703,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Store the new element. This may be larger than the vector element type,
// so use a truncating store.
@@ -714,11 +712,11 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment =
TLI.getTargetData()->getPrefTypeAlignment(VecType);
- Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT,
+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
false, false, 0);
// Load the Lo part from the stack slot.
- Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0,
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
false, false, 0);
// Increment the pointer to the other part.
@@ -727,8 +725,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
DAG.getIntPtrConstant(IncrementSize));
// Load the Hi part from the stack slot.
- Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false,
- false, MinAlign(Alignment, IncrementSize));
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, MinAlign(Alignment, IncrementSize));
}
void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -751,8 +749,6 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
SDValue Ch = LD->getChain();
SDValue Ptr = LD->getBasePtr();
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
- const Value *SV = LD->getSrcValue();
- int SVOffset = LD->getSrcValueOffset();
EVT MemoryVT = LD->getMemoryVT();
unsigned Alignment = LD->getOriginalAlignment();
bool isVolatile = LD->isVolatile();
@@ -762,14 +758,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
- SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment);
+ LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
+ Alignment);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- SVOffset += IncrementSize;
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
- SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment);
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ HiMemVT, isVolatile, isNonTemporal, Alignment);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -980,10 +977,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
#endif
llvm_unreachable("Do not know how to split this operator's operand!");
- case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@@ -995,6 +993,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::FP_EXTEND:
+ case ISD::FTRUNC:
case ISD::TRUNCATE:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
@@ -1036,8 +1036,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
-SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
- // For example, i64 = BIT_CONVERT v4i16 on alpha. Typically the vector will
+SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
+ // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
// end up being split all the way down to individual components. Convert the
// split pieces into integers and reassemble.
SDValue Lo, Hi;
@@ -1048,13 +1048,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
JoinIntegers(Lo, Hi));
}
SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
- // We know that the extracted result type is legal. For now, assume the index
- // is a constant.
+ // We know that the extracted result type is legal.
EVT SubVT = N->getValueType(0);
SDValue Idx = N->getOperand(1);
DebugLoc dl = N->getDebugLoc();
@@ -1099,15 +1098,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
EVT EltVT = VecVT.getVectorElementType();
DebugLoc dl = N->getDebugLoc();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Load back the required element.
StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
- return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr,
- SV, 0, EltVT, false, false, 0);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ MachinePointerInfo(), EltVT, false, false, 0);
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -1118,7 +1115,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
bool isTruncating = N->isTruncatingStore();
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
- int SVOffset = N->getSrcValueOffset();
EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment();
bool isVol = N->isVolatile();
@@ -1132,22 +1128,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
if (isTruncating)
- Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
LoMemVT, isVol, isNT, Alignment);
else
- Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
isVol, isNT, Alignment);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- SVOffset += IncrementSize;
if (isTruncating)
- Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
HiMemVT, isVol, isNT, Alignment);
else
- Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
isVol, isNT, Alignment);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
@@ -1155,7 +1152,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
DebugLoc DL = N->getDebugLoc();
-
+
// The input operands all must have the same type, and we know the result the
// result type is valid. Convert this to a buildvector which extracts all the
// input elements.
@@ -1172,11 +1169,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
}
}
-
+
return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
&Elts[0], Elts.size());
}
+SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ DebugLoc DL = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}
+
+
//===----------------------------------------------------------------------===//
// Result Vector Widening
@@ -1201,7 +1216,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to widen the result of this operator!");
- case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break;
@@ -1297,7 +1312,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
- while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) {
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
}
@@ -1308,11 +1323,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
}
-
+
// No legal vector version so unroll the vector operation and then widen.
if (NumElts == 1)
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
-
+
// Since the operation can trap, apply operation on the original vector.
EVT MaxVT = VT;
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
@@ -1323,7 +1338,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
unsigned ConcatEnd = 0; // Current ConcatOps index.
int Idx = 0; // Current Idx into input vectors.
- // NumElts := greatest synthesizable vector size (at most WidenVT)
+ // NumElts := greatest legal vector size (at most WidenVT)
// while (orig. vector has unhandled elements) {
// take munches of size NumElts from the beginning and add to ConcatOps
// NumElts := next smaller supported vector size or 1
@@ -1341,13 +1356,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
do {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
- } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
if (NumElts == 1) {
for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
InOp1, DAG.getIntPtrConstant(Idx));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
InOp2, DAG.getIntPtrConstant(Idx));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
EOp1, EOp2);
@@ -1378,7 +1393,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
do {
NextSize *= 2;
NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
- } while (!TLI.isTypeSynthesizable(NextVT));
+ } while (!TLI.isTypeLegal(NextVT));
if (!VT.isVector()) {
// Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
@@ -1415,7 +1430,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
if (VT == WidenVT)
return ConcatOps[0];
}
-
+
// add undefs of size MaxVT until ConcatOps grows to length of WidenVT
unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
if (NumOps != ConcatEnd ) {
@@ -1428,7 +1443,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue InOp = N->getOperand(0);
- DebugLoc dl = N->getDebugLoc();
+ DebugLoc DL = N->getDebugLoc();
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
@@ -1444,11 +1459,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
InVTNumElts = InVT.getVectorNumElements();
- if (InVTNumElts == WidenNumElts)
- return DAG.getNode(Opcode, dl, WidenVT, InOp);
+ if (InVTNumElts == WidenNumElts) {
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+ }
}
- if (TLI.isTypeSynthesizable(InWidenVT)) {
+ if (TLI.isTypeLegal(InWidenVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1462,16 +1480,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue UndefVal = DAG.getUNDEF(InVT);
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(Opcode, dl, WidenVT,
- DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT,
- &Ops[0], NumConcat));
+ SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT,
+ &Ops[0], NumConcat);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVec);
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
}
if (InVTNumElts % WidenNumElts == 0) {
+ SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
+ InOp, DAG.getIntPtrConstant(0));
// Extract the input and convert the shorten input vector.
- return DAG.getNode(Opcode, dl, WidenVT,
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT,
- InOp, DAG.getIntPtrConstant(0)));
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVal);
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
}
}
@@ -1480,16 +1502,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
EVT EltVT = WidenVT.getVectorElementType();
unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
unsigned i;
- for (i=0; i < MinElts; ++i)
- Ops[i] = DAG.getNode(Opcode, dl, EltVT,
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
- DAG.getIntPtrConstant(i)));
+ for (i=0; i < MinElts; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getIntPtrConstant(i));
+ if (N->getNumOperands() == 1)
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+ else
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+ }
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts);
}
SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
@@ -1536,7 +1562,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
WidenVT, WidenLHS, DAG.getValueType(ExtVT));
}
-SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
SDValue InOp = N->getOperand(0);
EVT InVT = InOp.getValueType();
EVT VT = N->getValueType(0);
@@ -1555,7 +1581,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
InOp = GetPromotedInteger(InOp);
InVT = InOp.getValueType();
if (WidenVT.bitsEq(InVT))
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
break;
case SoftenFloat:
case ExpandInteger:
@@ -1570,13 +1596,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
InVT = InOp.getValueType();
if (WidenVT.bitsEq(InVT))
// The input widens to the same size. Convert to the widen value.
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
break;
}
unsigned WidenSize = WidenVT.getSizeInBits();
unsigned InSize = InVT.getSizeInBits();
- if (WidenSize % InSize == 0) {
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
// Determine new input vector type. The new input vector type will use
// the same element type (if its a vector) or use the input type as a
// vector. It is the same size as the type to widen to.
@@ -1590,7 +1617,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
}
- if (TLI.isTypeSynthesizable(NewInVT)) {
+ if (TLI.isTypeLegal(NewInVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1609,7 +1636,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
else
NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
NewInVT, &Ops[0], NewNumElts);
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
}
}
@@ -1730,7 +1757,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
SatOp, CvtCode);
}
- if (TLI.isTypeSynthesizable(InWidenVT)) {
+ if (TLI.isTypeLegal(InWidenVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1794,39 +1821,25 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT InVT = InOp.getValueType();
- ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);
- if (CIdx) {
- unsigned IdxVal = CIdx->getZExtValue();
- // Check if we can just return the input vector after widening.
- if (IdxVal == 0 && InVT == WidenVT)
- return InOp;
-
- // Check if we can extract from the vector.
- unsigned InNumElts = InVT.getVectorNumElements();
- if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
- }
+ // Check if we can just return the input vector after widening.
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0 && InVT == WidenVT)
+ return InOp;
+
+ // Check if we can extract from the vector.
+ unsigned InNumElts = InVT.getVectorNumElements();
+ if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
// We could try widening the input to the right length but for now, extract
// the original elements, fill the rest with undefs and build a vector.
SmallVector<SDValue, 16> Ops(WidenNumElts);
EVT EltVT = VT.getVectorElementType();
- EVT IdxVT = Idx.getValueType();
unsigned NumElts = VT.getVectorNumElements();
unsigned i;
- if (CIdx) {
- unsigned IdxVal = CIdx->getZExtValue();
- for (i=0; i < NumElts; ++i)
- Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(IdxVal+i, IdxVT));
- } else {
- Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx);
- for (i=1; i < NumElts; ++i) {
- SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
- DAG.getConstant(i, IdxVT));
- Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx);
- }
- }
+ for (i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(IdxVal+i));
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
@@ -1985,7 +1998,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to widen this operator's operand!");
- case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
@@ -2044,7 +2057,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
}
-SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue InOp = GetWidenedVector(N->getOperand(0));
EVT InWidenVT = InOp.getValueType();
@@ -2053,11 +2066,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
// Check if we can convert between two legal vector types and extract.
unsigned InWidenSize = InWidenVT.getSizeInBits();
unsigned Size = VT.getSizeInBits();
- if (InWidenSize % Size == 0 && !VT.isVector()) {
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
unsigned NewNumElts = InWidenSize / Size;
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
- if (TLI.isTypeSynthesizable(NewVT)) {
- SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
DAG.getIntPtrConstant(0));
}
@@ -2146,7 +2160,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
if (Width == WidenEltWidth)
return RetVT;
- // See if there is larger legal integer than the element type to load/store
+ // See if there is larger legal integer than the element type to load/store
unsigned VT;
for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
@@ -2154,7 +2168,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
unsigned MemVTWidth = MemVT.getSizeInBits();
if (MemVT.getSizeInBits() <= WidenEltWidth)
break;
- if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+ if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
RetVT = MemVT;
@@ -2168,7 +2182,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
EVT MemVT = (MVT::SimpleValueType) VT;
unsigned MemVTWidth = MemVT.getSizeInBits();
- if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
(WidenWidth % MemVTWidth) == 0 &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
@@ -2201,7 +2215,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
if (NewLdTy != LdTy) {
NumElts = Width / NewLdTy.getSizeInBits();
NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
- VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+ VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
// Readjust position and vector position based on new load type
Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
LdTy = NewLdTy;
@@ -2209,11 +2223,11 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
DAG.getIntPtrConstant(Idx++));
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp);
+ return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
}
-SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
- LoadSDNode * LD) {
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
+ LoadSDNode *LD) {
// The strategy assumes that we can efficiently load powers of two widths.
// The routines chops the vector into the largest vector loads with the same
// element type or scalar loads and then recombines it to the widen vector
@@ -2228,11 +2242,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
// Load information
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
- int SVOffset = LD->getSrcValueOffset();
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
- const Value *SV = LD->getSrcValue();
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth; // Difference
@@ -2241,7 +2253,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
// Find the vector type that can load from.
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
int NewVTWidth = NewVT.getSizeInBits();
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset,
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
isVolatile, isNonTemporal, Align);
LdChain.push_back(LdOp.getValue(1));
@@ -2251,7 +2263,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
unsigned NumElts = WidenWidth / NewVTWidth;
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
}
if (NewVT == WidenVT)
return LdOp;
@@ -2286,8 +2298,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
NewVTWidth = NewVT.getSizeInBits();
}
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV,
- SVOffset+Offset, isVolatile,
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ isVolatile,
isNonTemporal, MinAlign(Align, Increment));
LdChain.push_back(LdOp.getValue(1));
LdOps.push_back(LdOp);
@@ -2300,7 +2313,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
if (!LdOps[0].getValueType().isVector())
// All the loads are scalar loads.
return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
-
+
// If the load contains vectors, build the vector using concat vector.
// All of the vectors used to loads are power of 2 and the scalars load
// can be combined to make a power of 2 vector.
@@ -2362,11 +2375,9 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
// Load information
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
- int SVOffset = LD->getSrcValueOffset();
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
- const Value *SV = LD->getSrcValue();
EVT EltVT = WidenVT.getVectorElementType();
EVT LdEltVT = LdVT.getVectorElementType();
@@ -2376,16 +2387,17 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
unsigned WidenNumElts = WidenVT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(WidenNumElts);
unsigned Increment = LdEltVT.getSizeInBits() / 8;
- Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset,
+ Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
+ LD->getPointerInfo(),
LdEltVT, isVolatile, isNonTemporal, Align);
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
BasePtr, DAG.getIntPtrConstant(Offset));
- Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV,
- SVOffset + Offset, LdEltVT, isVolatile,
- isNonTemporal, Align);
+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
+ isVolatile, isNonTemporal, Align);
LdChain.push_back(Ops[i].getValue(1));
}
@@ -2405,8 +2417,6 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
// element type or scalar stores.
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
- const Value *SV = ST->getSrcValue();
- int SVOffset = ST->getSrcValueOffset();
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
@@ -2433,9 +2443,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
do {
SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
DAG.getIntPtrConstant(Idx));
- StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
- SVOffset + Offset, isVolatile,
- isNonTemporal,
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
MinAlign(Align, Offset)));
StWidth -= NewVTWidth;
Offset += Increment;
@@ -2447,15 +2457,16 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
// Cast the vector to the scalar type we can store
unsigned NumElts = ValWidth / NewVTWidth;
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
- SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
+ SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
// Readjust index position based on new vector type
Idx = Idx * ValEltWidth / NewVTWidth;
do {
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
DAG.getIntPtrConstant(Idx++));
- StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
- SVOffset + Offset, isVolatile,
- isNonTemporal, MinAlign(Align, Offset)));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
+ MinAlign(Align, Offset)));
StWidth -= NewVTWidth;
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
@@ -2474,14 +2485,12 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
// and then store it. Instead, we extract each element and then store it.
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
- const Value *SV = ST->getSrcValue();
- int SVOffset = ST->getSrcValueOffset();
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
SDValue ValOp = GetWidenedVector(ST->getValue());
DebugLoc dl = ST->getDebugLoc();
-
+
EVT StVT = ST->getMemoryVT();
EVT ValVT = ValOp.getValueType();
@@ -2499,8 +2508,8 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
unsigned NumElts = StVT.getVectorNumElements();
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getIntPtrConstant(0));
- StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
- SVOffset, StEltVT,
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo(), StEltVT,
isVolatile, isNonTemporal, Align));
unsigned Offset = Increment;
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
@@ -2508,9 +2517,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
BasePtr, DAG.getIntPtrConstant(Offset));
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getIntPtrConstant(0));
- StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
- SVOffset + Offset, StEltVT,
- isVolatile, isNonTemporal,
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ StEltVT, isVolatile, isNonTemporal,
MinAlign(Align, Offset)));
}
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index ac2d338..2dcb229 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -16,7 +16,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DebugLoc.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
namespace llvm {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index fae2729..e3da208 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -205,7 +205,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
/// successors to the newly created node.
SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
- if (SU->getNode()->getFlaggedNode())
+ if (SU->getNode()->getGluedNode())
return NULL;
SDNode *N = SU->getNode();
@@ -216,7 +216,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
else if (VT == MVT::Other)
TryUnfold = true;
@@ -224,7 +224,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
const SDValue &Op = N->getOperand(i);
EVT VT = Op.getNode()->getValueType(Op.getResNo());
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
}
@@ -476,12 +476,12 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
}
}
- for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
if (Node->getOpcode() == ISD::INLINEASM) {
// Inline asm can clobber physical defs.
unsigned NumOps = Node->getNumOperands();
- if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
- --NumOps; // Ignore the flag operand.
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index 56f5ded..430283d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -40,7 +40,7 @@ STATISTIC(NumStalls, "Number of pipeline stalls");
static RegisterScheduler
tdListDAGScheduler("list-td", "Top-down list scheduler",
createTDListDAGScheduler);
-
+
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGList - The actual list scheduler implementation. This supports
@@ -51,7 +51,7 @@ private:
/// AvailableQueue - The priority queue to use for the available SUnits.
///
SchedulingPriorityQueue *AvailableQueue;
-
+
/// PendingQueue - This contains all of the instructions whose operands have
/// been issued, but their results are not ready yet (due to the latency of
/// the operation). Once the operands become available, the instruction is
@@ -63,11 +63,12 @@ private:
public:
ScheduleDAGList(MachineFunction &mf,
- SchedulingPriorityQueue *availqueue,
- ScheduleHazardRecognizer *HR)
- : ScheduleDAGSDNodes(mf),
- AvailableQueue(availqueue), HazardRec(HR) {
- }
+ SchedulingPriorityQueue *availqueue)
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) {
+
+ const TargetMachine &tm = mf.getTarget();
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ }
~ScheduleDAGList() {
delete HazardRec;
@@ -87,14 +88,14 @@ private:
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGList::Schedule() {
DEBUG(dbgs() << "********** List Scheduling **********\n");
-
+
// Build the scheduling graph.
BuildSchedGraph(NULL);
AvailableQueue->initNodes(SUnits);
-
+
ListScheduleTopDown();
-
+
AvailableQueue->releaseState();
}
@@ -118,7 +119,7 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
--SuccSU->NumPredsLeft;
SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
-
+
// If all the node's predecessors are scheduled, this node is ready
// to be scheduled. Ignore the special ExitSU node.
if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
@@ -142,7 +143,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
-
+
Sequence.push_back(SU);
assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
SU->setDepthToAtLeast(CurCycle);
@@ -168,7 +169,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
SUnits[i].isAvailable = true;
}
}
-
+
// While Available queue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
std::vector<SUnit*> NotReady;
@@ -187,7 +188,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
}
}
-
+
// If there are no instructions available, don't try to issue anything, and
// don't advance the hazard recognizer.
if (AvailableQueue->empty()) {
@@ -196,24 +197,24 @@ void ScheduleDAGList::ListScheduleTopDown() {
}
SUnit *FoundSUnit = 0;
-
+
bool HasNoopHazards = false;
while (!AvailableQueue->empty()) {
SUnit *CurSUnit = AvailableQueue->pop();
-
+
ScheduleHazardRecognizer::HazardType HT =
- HazardRec->getHazardType(CurSUnit);
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
if (HT == ScheduleHazardRecognizer::NoHazard) {
FoundSUnit = CurSUnit;
break;
}
-
+
// Remember if this is a noop hazard.
HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
-
+
NotReady.push_back(CurSUnit);
}
-
+
// Add the nodes that aren't ready back onto the available list.
if (!NotReady.empty()) {
AvailableQueue->push_all(NotReady);
@@ -228,7 +229,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
// If this is a pseudo-op node, we don't want to increment the current
// cycle.
if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
- ++CurCycle;
+ ++CurCycle;
} else if (!HasNoopHazards) {
// Otherwise, we have a pipeline stall, but no other problem, just advance
// the current cycle and try again.
@@ -257,12 +258,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-/// createTDListDAGScheduler - This creates a top-down list scheduler with a
-/// new hazard recognizer. This scheduler takes ownership of the hazard
-/// recognizer and deletes it when done.
+/// createTDListDAGScheduler - This creates a top-down list scheduler.
ScheduleDAGSDNodes *
llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
- return new ScheduleDAGList(*IS->MF,
- new LatencyPriorityQueue(),
- IS->CreateTargetHazardRecognizer());
+ return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue());
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 4c3e4e3..0b548b2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -20,6 +20,7 @@
#include "llvm/InlineAsm.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
@@ -65,6 +66,10 @@ static RegisterScheduler
"which tries to balance ILP and register pressure",
createILPListDAGScheduler);
+static cl::opt<bool> DisableSchedCycles(
+ "disable-sched-cycles", cl::Hidden, cl::init(false),
+ cl::desc("Disable cycle-level precision during preRA scheduling"));
+
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -83,31 +88,56 @@ private:
/// AvailableQueue - The priority queue to use for the available SUnits.
SchedulingPriorityQueue *AvailableQueue;
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// CurCycle - The current scheduler state corresponds to this cycle.
+ unsigned CurCycle;
+
+ /// MinAvailableCycle - Cycle of the soonest available instruction.
+ unsigned MinAvailableCycle;
+
/// LiveRegDefs - A set of physical registers and their definition
/// that are "live". These nodes must be scheduled before any other nodes that
/// modifies the registers can be scheduled.
unsigned NumLiveRegs;
std::vector<SUnit*> LiveRegDefs;
- std::vector<unsigned> LiveRegCycles;
+ std::vector<SUnit*> LiveRegGens;
/// Topo - A topological ordering for SUnits which permits fast IsReachable
/// and similar queries.
ScheduleDAGTopologicalSort Topo;
public:
- ScheduleDAGRRList(MachineFunction &mf,
- bool isbottomup, bool needlatency,
- SchedulingPriorityQueue *availqueue)
- : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
- AvailableQueue(availqueue), Topo(SUnits) {
- }
+ ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
+ SchedulingPriorityQueue *availqueue,
+ CodeGenOpt::Level OptLevel)
+ : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
+ NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
+ Topo(SUnits) {
+
+ const TargetMachine &tm = mf.getTarget();
+ if (DisableSchedCycles || !NeedLatency)
+ HazardRec = new ScheduleHazardRecognizer();
+ else
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ }
~ScheduleDAGRRList() {
+ delete HazardRec;
delete AvailableQueue;
}
void Schedule();
+ ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
+
/// IsReachable - Checks if SU is reachable from TargetSU.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
return Topo.IsReachable(SU, TargetSU);
@@ -136,24 +166,37 @@ public:
}
private:
+ bool isReady(SUnit *SU) {
+ return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
+ AvailableQueue->isReady(SU);
+ }
+
void ReleasePred(SUnit *SU, const SDep *PredEdge);
- void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ReleasePredecessors(SUnit *SU);
void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
void ReleaseSuccessors(SUnit *SU);
+ void ReleasePending();
+ void AdvanceToCycle(unsigned NextCycle);
+ void AdvancePastStalls(SUnit *SU);
+ void EmitNode(SUnit *SU);
+ void ScheduleNodeBottomUp(SUnit*);
void CapturePred(SDep *PredEdge);
- void ScheduleNodeBottomUp(SUnit*, unsigned);
- void ScheduleNodeTopDown(SUnit*, unsigned);
void UnscheduleNodeBottomUp(SUnit*);
- void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
+ void RestoreHazardCheckerBottomUp();
+ void BacktrackBottomUp(SUnit*, SUnit*);
SUnit *CopyAndMoveSuccessors(SUnit*);
void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
const TargetRegisterClass*,
const TargetRegisterClass*,
SmallVector<SUnit*, 2>&);
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
- void ListScheduleTopDown();
+
+ SUnit *PickNodeToScheduleBottomUp();
void ListScheduleBottomUp();
+ void ScheduleNodeTopDown(SUnit*);
+ void ListScheduleTopDown();
+
/// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
/// Updates the topological ordering if required.
@@ -190,11 +233,13 @@ private:
void ScheduleDAGRRList::Schedule() {
DEBUG(dbgs()
<< "********** List Scheduling BB#" << BB->getNumber()
- << " **********\n");
+ << " '" << BB->getName() << "' **********\n");
+ CurCycle = 0;
+ MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
NumLiveRegs = 0;
- LiveRegDefs.resize(TRI->getNumRegs(), NULL);
- LiveRegCycles.resize(TRI->getNumRegs(), 0);
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegGens.resize(TRI->getNumRegs(), NULL);
// Build the scheduling graph.
BuildSchedGraph(NULL);
@@ -204,13 +249,15 @@ void ScheduleDAGRRList::Schedule() {
Topo.InitDAGTopologicalSorting();
AvailableQueue->initNodes(SUnits);
-
+
+ HazardRec->Reset();
+
// Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
if (isBottomUp)
ListScheduleBottomUp();
else
ListScheduleTopDown();
-
+
AvailableQueue->releaseState();
}
@@ -243,33 +290,197 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
// to be scheduled. Ignore the special EntrySU node.
if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
PredSU->isAvailable = true;
- AvailableQueue->push(PredSU);
+
+ unsigned Height = PredSU->getHeight();
+ if (Height < MinAvailableCycle)
+ MinAvailableCycle = Height;
+
+ if (isReady(SU)) {
+ AvailableQueue->push(PredSU);
+ }
+ // CapturePred and others may have left the node in the pending queue, avoid
+ // adding it twice.
+ else if (!PredSU->isPending) {
+ PredSU->isPending = true;
+ PendingQueue.push_back(PredSU);
+ }
}
}
-void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+/// Call ReleasePred for each predecessor, then update register live def/gen.
+/// Always update LiveRegDefs for a register dependence even if the current SU
+/// also defines the register. This effectively create one large live range
+/// across a sequence of two-address node. This is important because the
+/// entire chain must be scheduled together. Example:
+///
+/// flags = (3) add
+/// flags = (2) addc flags
+/// flags = (1) addc flags
+///
+/// results in
+///
+/// LiveRegDefs[flags] = 3
+/// LiveRegGens[flags] = 1
+///
+/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
+/// interference on flags.
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
// Bottom up: release predecessors
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
ReleasePred(SU, &*I);
if (I->isAssignedRegDep()) {
// This is a physical register dependency and it's impossible or
- // expensive to copy the register. Make sure nothing that can
+ // expensive to copy the register. Make sure nothing that can
// clobber the register is scheduled between the predecessor and
// this node.
- if (!LiveRegDefs[I->getReg()]) {
+ SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
+ assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
+ "interference on register dependence");
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ if (!LiveRegGens[I->getReg()]) {
++NumLiveRegs;
- LiveRegDefs[I->getReg()] = I->getSUnit();
- LiveRegCycles[I->getReg()] = CurCycle;
+ LiveRegGens[I->getReg()] = SU;
}
}
}
}
+/// Check to see if any of the pending instructions are ready to issue. If
+/// so, add them to the available queue.
+void ScheduleDAGRRList::ReleasePending() {
+ if (DisableSchedCycles) {
+ assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
+ return;
+ }
+
+ // If the available queue is empty, it is safe to reset MinAvailableCycle.
+ if (AvailableQueue->empty())
+ MinAvailableCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ unsigned ReadyCycle =
+ isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
+ if (ReadyCycle < MinAvailableCycle)
+ MinAvailableCycle = ReadyCycle;
+
+ if (PendingQueue[i]->isAvailable) {
+ if (!isReady(PendingQueue[i]))
+ continue;
+ AvailableQueue->push(PendingQueue[i]);
+ }
+ PendingQueue[i]->isPending = false;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+}
+
+/// Move the scheduler state forward by the specified number of Cycles.
+void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
+ if (NextCycle <= CurCycle)
+ return;
+
+ AvailableQueue->setCurCycle(NextCycle);
+ if (!HazardRec->isEnabled()) {
+ // Bypass lots of virtual calls in case of long latency.
+ CurCycle = NextCycle;
+ }
+ else {
+ for (; CurCycle != NextCycle; ++CurCycle) {
+ if (isBottomUp)
+ HazardRec->RecedeCycle();
+ else
+ HazardRec->AdvanceCycle();
+ }
+ }
+ // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
+ // available Q to release pending nodes at least once before popping.
+ ReleasePending();
+}
+
+/// Move the scheduler state forward until the specified node's dependents are
+/// ready and can be scheduled with no resource conflicts.
+void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
+ if (DisableSchedCycles)
+ return;
+
+ unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
+
+ // Bump CurCycle to account for latency. We assume the latency of other
+ // available instructions may be hidden by the stall (not a full pipe stall).
+ // This updates the hazard recognizer's cycle before reserving resources for
+ // this instruction.
+ AdvanceToCycle(ReadyCycle);
+
+ // Calls are scheduled in their preceding cycle, so don't conflict with
+ // hazards from instructions after the call. EmitNode will reset the
+ // scoreboard state before emitting the call.
+ if (isBottomUp && SU->isCall)
+ return;
+
+ // FIXME: For resource conflicts in very long non-pipelined stages, we
+ // should probably skip ahead here to avoid useless scoreboard checks.
+ int Stalls = 0;
+ while (true) {
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
+
+ if (HT == ScheduleHazardRecognizer::NoHazard)
+ break;
+
+ ++Stalls;
+ }
+ AdvanceToCycle(CurCycle + Stalls);
+}
+
+/// Record this SUnit in the HazardRecognizer.
+/// Does not update CurCycle.
+void ScheduleDAGRRList::EmitNode(SUnit *SU) {
+ if (!HazardRec->isEnabled())
+ return;
+
+ // Check for phys reg copy.
+ if (!SU->getNode())
+ return;
+
+ switch (SU->getNode()->getOpcode()) {
+ default:
+ assert(SU->getNode()->isMachineOpcode() &&
+ "This target-independent node should not be scheduled.");
+ break;
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor:
+ case ISD::CopyToReg:
+ case ISD::CopyFromReg:
+ case ISD::EH_LABEL:
+ // Noops don't affect the scoreboard state. Copies are likely to be
+ // removed.
+ return;
+ case ISD::INLINEASM:
+ // For inline asm, clear the pipeline state.
+ HazardRec->Reset();
+ return;
+ }
+ if (isBottomUp && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+
+ HazardRec->EmitInstruction(SU);
+
+ if (!isBottomUp && SU->isCall) {
+ HazardRec->Reset();
+ }
+}
+
/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
/// count of its predecessors. If a predecessor pending count is zero, add it to
/// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
@@ -278,36 +489,51 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
#endif
- // FIXME: Handle noop hazard.
+ // FIXME: Do not modify node height. It may interfere with
+ // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
+ // node it's ready cycle can aid heuristics, and after scheduling it can
+ // indicate the scheduled cycle.
SU->setHeightToAtLeast(CurCycle);
+
+ // Reserve resources for the scheduled intruction.
+ EmitNode(SU);
+
Sequence.push_back(SU);
AvailableQueue->ScheduledNode(SU);
- ReleasePredecessors(SU, CurCycle);
+ // Update liveness of predecessors before successors to avoid treating a
+ // two-address node as a live range def.
+ ReleasePredecessors(SU);
// Release all the implicit physical register defs that are live.
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
- if (I->isAssignedRegDep()) {
- if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
- assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
- assert(LiveRegDefs[I->getReg()] == SU &&
- "Physical register dependency violated?");
- --NumLiveRegs;
- LiveRegDefs[I->getReg()] = NULL;
- LiveRegCycles[I->getReg()] = 0;
- }
+ // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegGens[I->getReg()] = NULL;
}
}
SU->isScheduled = true;
+
+ // Conditions under which the scheduler should eagerly advance the cycle:
+ // (1) No available instructions
+ // (2) All pipelines full, so available instructions must have hazards.
+ //
+ // If HazardRec is disabled, count each inst as one cycle.
+ if (!HazardRec->isEnabled() || HazardRec->atIssueLimit()
+ || AvailableQueue->empty())
+ AdvanceToCycle(CurCycle + 1);
}
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
/// unscheduled, incrcease the succ left count of its predecessors. Remove
/// them from AvailableQueue if necessary.
-void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
SUnit *PredSU = PredEdge->getSUnit();
if (PredSU->isAvailable) {
PredSU->isAvailable = false;
@@ -328,59 +554,98 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
CapturePred(&*I);
- if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){
+ if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
"Physical register dependency violated?");
--NumLiveRegs;
LiveRegDefs[I->getReg()] = NULL;
- LiveRegCycles[I->getReg()] = 0;
+ LiveRegGens[I->getReg()] = NULL;
}
}
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isAssignedRegDep()) {
+ // This becomes the nearest def. Note that an earlier def may still be
+ // pending if this is a two-address node.
+ LiveRegDefs[I->getReg()] = SU;
if (!LiveRegDefs[I->getReg()]) {
- LiveRegDefs[I->getReg()] = SU;
++NumLiveRegs;
}
- if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
- LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
+ if (LiveRegGens[I->getReg()] == NULL ||
+ I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
+ LiveRegGens[I->getReg()] = I->getSUnit();
}
}
+ if (SU->getHeight() < MinAvailableCycle)
+ MinAvailableCycle = SU->getHeight();
SU->setHeightDirty();
SU->isScheduled = false;
SU->isAvailable = true;
- AvailableQueue->push(SU);
+ if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
+ // Don't make available until backtracking is complete.
+ SU->isPending = true;
+ PendingQueue.push_back(SU);
+ }
+ else {
+ AvailableQueue->push(SU);
+ }
AvailableQueue->UnscheduledNode(SU);
}
+/// After backtracking, the hazard checker needs to be restored to a state
+/// corresponding the the current cycle.
+void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
+ HazardRec->Reset();
+
+ unsigned LookAhead = std::min((unsigned)Sequence.size(),
+ HazardRec->getMaxLookAhead());
+ if (LookAhead == 0)
+ return;
+
+ std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
+ unsigned HazardCycle = (*I)->getHeight();
+ for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
+ SUnit *SU = *I;
+ for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
+ HazardRec->RecedeCycle();
+ }
+ EmitNode(SU);
+ }
+}
+
/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
/// BTCycle in order to schedule a specific node.
-void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
- unsigned &CurCycle) {
- SUnit *OldSU = NULL;
- while (CurCycle > BtCycle) {
- OldSU = Sequence.back();
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
+ SUnit *OldSU = Sequence.back();
+ while (true) {
Sequence.pop_back();
if (SU->isSucc(OldSU))
// Don't try to remove SU from AvailableQueue.
SU->isAvailable = false;
+ // FIXME: use ready cycle instead of height
+ CurCycle = OldSU->getHeight();
UnscheduleNodeBottomUp(OldSU);
- --CurCycle;
AvailableQueue->setCurCycle(CurCycle);
+ if (OldSU == BtSU)
+ break;
+ OldSU = Sequence.back();
}
assert(!SU->isSucc(OldSU) && "Something is wrong!");
+ RestoreHazardCheckerBottomUp();
+
+ ReleasePending();
+
++NumBacktracks;
}
static bool isOperandOf(const SUnit *SU, SDNode *N) {
for (const SDNode *SUNode = SU->getNode(); SUNode;
- SUNode = SUNode->getFlaggedNode()) {
+ SUNode = SUNode->getGluedNode()) {
if (SUNode->isOperandOf(N))
return true;
}
@@ -390,18 +655,18 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) {
/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
/// successors to the newly created node.
SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
- if (SU->getNode()->getFlaggedNode())
- return NULL;
-
SDNode *N = SU->getNode();
if (!N)
return NULL;
+ if (SU->getNode()->getGluedNode())
+ return NULL;
+
SUnit *NewSU;
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
else if (VT == MVT::Other)
TryUnfold = true;
@@ -409,7 +674,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
const SDValue &Op = N->getOperand(i);
EVT VT = Op.getNode()->getValueType(Op.getResNo());
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
}
@@ -441,13 +706,15 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
} else {
LoadSU = CreateNewSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum);
+
+ InitNumRegDefsLeft(LoadSU);
ComputeLatency(LoadSU);
}
SUnit *NewSU = CreateNewSUnit(N);
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NewSU->NodeNum);
-
+
const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
@@ -457,6 +724,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
}
if (TID.isCommutable())
NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
ComputeLatency(NewSU);
// Record all the edges to and from the old SU, by category.
@@ -507,6 +776,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
RemovePred(SuccDep, D);
D.setSUnit(NewSU);
AddPred(SuccDep, D);
+ // Balance register pressure.
+ if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
+ && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+ --NewSU->NumRegDefsLeft;
}
for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
SDep D = ChainSuccs[i];
@@ -517,7 +790,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
D.setSUnit(LoadSU);
AddPred(SuccDep, D);
}
- }
+ }
// Add a data dependency to reflect that NewSU reads the value defined
// by LoadSU.
@@ -633,52 +906,52 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
/// CheckForLiveRegDef - Return true and update live register vector if the
/// specified register def of the specified SUnit clobbers any "live" registers.
-static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
std::vector<SUnit*> &LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVector<unsigned, 4> &LRegs,
const TargetRegisterInfo *TRI) {
- bool Added = false;
- if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
- if (RegAdded.insert(Reg)) {
+ for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+
+ // Check if Ref is live.
+ if (!LiveRegDefs[Reg]) continue;
+
+ // Allow multiple uses of the same def.
+ if (LiveRegDefs[Reg] == SU) continue;
+
+ // Add Reg to the set of interfering live regs.
+ if (RegAdded.insert(Reg))
LRegs.push_back(Reg);
- Added = true;
- }
}
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
- if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
- if (RegAdded.insert(*Alias)) {
- LRegs.push_back(*Alias);
- Added = true;
- }
- }
- return Added;
}
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
/// scheduling of the given node to satisfy live physical register dependencies.
/// If the specific node is the last one that's available to schedule, do
/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
-bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
- SmallVector<unsigned, 4> &LRegs){
+bool ScheduleDAGRRList::
+DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
if (NumLiveRegs == 0)
return false;
SmallSet<unsigned, 4> RegAdded;
// If this node would clobber any "live" register, then it's not ready.
+ //
+ // If SU is the currently live definition of the same register that it uses,
+ // then we are free to schedule it.
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
- if (I->isAssignedRegDep())
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
RegAdded, LRegs, TRI);
}
- for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
if (Node->getOpcode() == ISD::INLINEASM) {
// Inline asm can clobber physical defs.
unsigned NumOps = Node->getNumOperands();
- if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
- --NumOps; // Ignore the flag operand.
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
@@ -708,17 +981,151 @@ bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
+
return !LRegs.empty();
}
+/// Return a node that can be scheduled in this cycle. Requirements:
+/// (1) Ready: latency has been satisfied
+/// (2) No Hazards: resources are available
+/// (3) No Interferences: may unschedule to break register interferences.
+SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
+ SmallVector<SUnit*, 4> Interferences;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+
+ SUnit *CurSU = AvailableQueue->pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ Interferences.push_back(CurSU);
+ CurSU = AvailableQueue->pop();
+ }
+ if (CurSU) {
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ Interferences[i]->isPending = false;
+ assert(Interferences[i]->isAvailable && "must still be available");
+ AvailableQueue->push(Interferences[i]);
+ }
+ return CurSU;
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try backtracking, code duplication, or inserting cross class copies
+ // to resolve it.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ SUnit *TrySU = Interferences[i];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+
+ // Try unscheduling up to the point where it's safe to schedule
+ // this node.
+ SUnit *BtSU = NULL;
+ unsigned LiveCycle = UINT_MAX;
+ for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
+ unsigned Reg = LRegs[j];
+ if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
+ BtSU = LiveRegGens[Reg];
+ LiveCycle = BtSU->getHeight();
+ }
+ }
+ if (!WillCreateCycle(TrySU, BtSU)) {
+ BacktrackBottomUp(TrySU, BtSU);
+
+ // Force the current node to be scheduled before the node that
+ // requires the physical reg dep.
+ if (BtSU->isAvailable) {
+ BtSU->isAvailable = false;
+ if (!BtSU->isPending)
+ AvailableQueue->remove(BtSU);
+ }
+ AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+
+ // If one or more successors has been unscheduled, then the current
+ // node is no longer avaialable. Schedule a successor that's now
+ // available instead.
+ if (!TrySU->isAvailable) {
+ CurSU = AvailableQueue->pop();
+ }
+ else {
+ CurSU = TrySU;
+ TrySU->isPending = false;
+ Interferences.erase(Interferences.begin()+i);
+ }
+ break;
+ }
+ }
+
+ if (!CurSU) {
+ // Can't backtrack. If it's too expensive to copy the value, then try
+ // duplicate the nodes that produces these "too expensive to copy"
+ // values to break the dependency. In case even that doesn't work,
+ // insert cross class copies.
+ // If it's not too expensive, i.e. cost != -1, issue copies.
+ SUnit *TrySU = Interferences[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is null, then it must be possible copy
+ // the value directly. Do not try duplicate the def.
+ SUnit *NewDef = 0;
+ if (DestRC)
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ else
+ DestRC = RC;
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ NewDef = Copies.back();
+ }
+
+ DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ assert(CurSU && "Unable to resolve live physical register dependencies!");
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ Interferences[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (Interferences[i]->isAvailable) {
+ AvailableQueue->push(Interferences[i]);
+ }
+ }
+ return CurSU;
+}
/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
/// schedulers.
void ScheduleDAGRRList::ListScheduleBottomUp() {
- unsigned CurCycle = 0;
-
// Release any predecessors of the special Exit node.
- ReleasePredecessors(&ExitSU, CurCycle);
+ ReleasePredecessors(&ExitSU);
// Add root to Available queue.
if (!SUnits.empty()) {
@@ -730,135 +1137,29 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
// While Available queue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
- SmallVector<SUnit*, 4> NotReady;
- DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty()) {
- bool Delayed = false;
- LRegsMap.clear();
- SUnit *CurSU = AvailableQueue->pop();
- while (CurSU) {
- SmallVector<unsigned, 4> LRegs;
- if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
- break;
- Delayed = true;
- LRegsMap.insert(std::make_pair(CurSU, LRegs));
+ DEBUG(dbgs() << "\n*** Examining Available\n";
+ AvailableQueue->dump(this));
- CurSU->isPending = true; // This SU is not in AvailableQueue right now.
- NotReady.push_back(CurSU);
- CurSU = AvailableQueue->pop();
- }
+ // Pick the best node to schedule taking all constraints into
+ // consideration.
+ SUnit *SU = PickNodeToScheduleBottomUp();
- // All candidates are delayed due to live physical reg dependencies.
- // Try backtracking, code duplication, or inserting cross class copies
- // to resolve it.
- if (Delayed && !CurSU) {
- for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
- SUnit *TrySU = NotReady[i];
- SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
-
- // Try unscheduling up to the point where it's safe to schedule
- // this node.
- unsigned LiveCycle = CurCycle;
- for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
- unsigned Reg = LRegs[j];
- unsigned LCycle = LiveRegCycles[Reg];
- LiveCycle = std::min(LiveCycle, LCycle);
- }
- SUnit *OldSU = Sequence[LiveCycle];
- if (!WillCreateCycle(TrySU, OldSU)) {
- BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
- // Force the current node to be scheduled before the node that
- // requires the physical reg dep.
- if (OldSU->isAvailable) {
- OldSU->isAvailable = false;
- AvailableQueue->remove(OldSU);
- }
- AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false, /*isArtificial=*/true));
- // If one or more successors has been unscheduled, then the current
- // node is no longer avaialable. Schedule a successor that's now
- // available instead.
- if (!TrySU->isAvailable)
- CurSU = AvailableQueue->pop();
- else {
- CurSU = TrySU;
- TrySU->isPending = false;
- NotReady.erase(NotReady.begin()+i);
- }
- break;
- }
- }
+ AdvancePastStalls(SU);
- if (!CurSU) {
- // Can't backtrack. If it's too expensive to copy the value, then try
- // duplicate the nodes that produces these "too expensive to copy"
- // values to break the dependency. In case even that doesn't work,
- // insert cross class copies.
- // If it's not too expensive, i.e. cost != -1, issue copies.
- SUnit *TrySU = NotReady[0];
- SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
- assert(LRegs.size() == 1 && "Can't handle this yet!");
- unsigned Reg = LRegs[0];
- SUnit *LRDef = LiveRegDefs[Reg];
- EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
- const TargetRegisterClass *RC =
- TRI->getMinimalPhysRegClass(Reg, VT);
- const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
-
- // If cross copy register class is null, then it must be possible copy
- // the value directly. Do not try duplicate the def.
- SUnit *NewDef = 0;
- if (DestRC)
- NewDef = CopyAndMoveSuccessors(LRDef);
- else
- DestRC = RC;
- if (!NewDef) {
- // Issue copies, these can be expensive cross register class copies.
- SmallVector<SUnit*, 2> Copies;
- InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
- DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
- << " to SU #" << Copies.front()->NodeNum << "\n");
- AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false,
- /*isArtificial=*/true));
- NewDef = Copies.back();
- }
+ ScheduleNodeBottomUp(SU);
- DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
- << " to SU #" << TrySU->NodeNum << "\n");
- LiveRegDefs[Reg] = NewDef;
- AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false,
- /*isArtificial=*/true));
- TrySU->isAvailable = false;
- CurSU = NewDef;
- }
-
- assert(CurSU && "Unable to resolve live physical register dependencies!");
- }
-
- // Add the nodes that aren't ready back onto the available list.
- for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
- NotReady[i]->isPending = false;
- // May no longer be available due to backtracking.
- if (NotReady[i]->isAvailable)
- AvailableQueue->push(NotReady[i]);
+ while (AvailableQueue->empty() && !PendingQueue.empty()) {
+ // Advance the cycle to free resources. Skip ahead to the next ready SU.
+ assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
+ AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
}
- NotReady.clear();
-
- if (CurSU)
- ScheduleNodeBottomUp(CurSU, CurCycle);
- ++CurCycle;
- AvailableQueue->setCurCycle(CurCycle);
}
// Reverse the order if it is bottom up.
std::reverse(Sequence.begin(), Sequence.end());
-
+
#ifndef NDEBUG
VerifySchedule(isBottomUp);
#endif
@@ -905,7 +1206,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
/// count of its successors. If a successor pending count is zero, add it to
/// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
@@ -921,7 +1222,6 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
/// ListScheduleTopDown - The main loop of list scheduling for top-down
/// schedulers.
void ScheduleDAGRRList::ListScheduleTopDown() {
- unsigned CurCycle = 0;
AvailableQueue->setCurCycle(CurCycle);
// Release any successors of the special Entry node.
@@ -935,19 +1235,19 @@ void ScheduleDAGRRList::ListScheduleTopDown() {
SUnits[i].isAvailable = true;
}
}
-
+
// While Available queue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty()) {
SUnit *CurSU = AvailableQueue->pop();
-
+
if (CurSU)
- ScheduleNodeTopDown(CurSU, CurCycle);
+ ScheduleNodeTopDown(CurSU);
++CurCycle;
AvailableQueue->setCurCycle(CurCycle);
}
-
+
#ifndef NDEBUG
VerifySchedule(isBottomUp);
#endif
@@ -955,70 +1255,288 @@ void ScheduleDAGRRList::ListScheduleTopDown() {
//===----------------------------------------------------------------------===//
-// RegReductionPriorityQueue Implementation
+// RegReductionPriorityQueue Definition
//===----------------------------------------------------------------------===//
//
// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
// to reduce register pressure.
-//
+//
namespace {
- template<class SF>
- class RegReductionPriorityQueue;
-
- /// bu_ls_rr_sort - Priority function for bottom up register pressure
- // reduction scheduler.
- struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
- bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
- bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-
- bool operator()(const SUnit* left, const SUnit* right) const;
+class RegReductionPQBase;
+
+struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
+};
+
+/// bu_ls_rr_sort - Priority function for bottom up register pressure
+// reduction scheduler.
+struct bu_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
};
- // td_ls_rr_sort - Priority function for top down register pressure reduction
- // scheduler.
- struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
- td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
- td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-
- bool operator()(const SUnit* left, const SUnit* right) const;
+ RegReductionPQBase *SPQ;
+ bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+ bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// td_ls_rr_sort - Priority function for top down register pressure reduction
+// scheduler.
+struct td_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = false,
+ HasReadyFilter = false
};
- // src_ls_rr_sort - Priority function for source order scheduler.
- struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
- src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
- : SPQ(spq) {}
- src_ls_rr_sort(const src_ls_rr_sort &RHS)
- : SPQ(RHS.SPQ) {}
-
- bool operator()(const SUnit* left, const SUnit* right) const;
+ RegReductionPQBase *SPQ;
+ td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+ td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+};
+
+// src_ls_rr_sort - Priority function for source order scheduler.
+struct src_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
};
- // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
- struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
- hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
- : SPQ(spq) {}
- hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
- : SPQ(RHS.SPQ) {}
+ RegReductionPQBase *SPQ;
+ src_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ src_ls_rr_sort(const src_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
- bool operator()(const SUnit* left, const SUnit* right) const;
+// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
+struct hybrid_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = true
};
- // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
- // scheduler.
- struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
- ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
- : SPQ(spq) {}
- ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
- : SPQ(RHS.SPQ) {}
+ RegReductionPQBase *SPQ;
+ hybrid_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
- bool operator()(const SUnit* left, const SUnit* right) const;
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+// scheduler.
+struct ilp_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = true
};
-} // end anonymous namespace
+
+ RegReductionPQBase *SPQ;
+ ilp_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+class RegReductionPQBase : public SchedulingPriorityQueue {
+protected:
+ std::vector<SUnit*> Queue;
+ unsigned CurQueueId;
+ bool TracksRegPressure;
+
+ // SUnits - The SUnits for the current graph.
+ std::vector<SUnit> *SUnits;
+
+ MachineFunction &MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+ ScheduleDAGRRList *scheduleDAG;
+
+ // SethiUllmanNumbers - The SethiUllman number for each node.
+ std::vector<unsigned> SethiUllmanNumbers;
+
+ /// RegPressure - Tracking current reg pressure per register class.
+ ///
+ std::vector<unsigned> RegPressure;
+
+ /// RegLimit - Tracking the number of allocatable registers per register
+ /// class.
+ std::vector<unsigned> RegLimit;
+
+public:
+ RegReductionPQBase(MachineFunction &mf,
+ bool hasReadyFilter,
+ bool tracksrp,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : SchedulingPriorityQueue(hasReadyFilter),
+ CurQueueId(0), TracksRegPressure(tracksrp),
+ MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
+ if (TracksRegPressure) {
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
+ }
+ }
+
+ void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+ scheduleDAG = scheduleDag;
+ }
+
+ ScheduleHazardRecognizer* getHazardRec() {
+ return scheduleDAG->getHazardRec();
+ }
+
+ void initNodes(std::vector<SUnit> &sunits);
+
+ void addNode(const SUnit *SU);
+
+ void updateNode(const SUnit *SU);
+
+ void releaseState() {
+ SUnits = 0;
+ SethiUllmanNumbers.clear();
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ }
+
+ unsigned getNodePriority(const SUnit *SU) const;
+
+ unsigned getNodeOrdering(const SUnit *SU) const {
+ return scheduleDAG->DAG->GetOrdering(SU->getNode());
+ }
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ assert(!U->NodeQueueId && "Node in the queue already");
+ U->NodeQueueId = ++CurQueueId;
+ Queue.push_back(U);
+ }
+
+ void remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ assert(SU->NodeQueueId != 0 && "Not in queue!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
+ SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+ SU->NodeQueueId = 0;
+ }
+
+ bool tracksRegPressure() const { return TracksRegPressure; }
+
+ void dumpRegPressure() const;
+
+ bool HighRegPressure(const SUnit *SU) const;
+
+ bool MayReduceRegPressure(SUnit *SU);
+
+ void ScheduledNode(SUnit *SU);
+
+ void UnscheduledNode(SUnit *SU);
+
+protected:
+ bool canClobber(const SUnit *SU, const SUnit *Op);
+ void AddPseudoTwoAddrDeps();
+ void PrescheduleNodesWithMultipleUses();
+ void CalculateSethiUllmanNumbers();
+};
+
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
+ static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
+ std::vector<SUnit *>::iterator Best = Q.begin();
+ for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+ E = Q.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != prior(Q.end()))
+ std::swap(*Best, Q.back());
+ Q.pop_back();
+ return V;
+ }
+
+ SF Picker;
+
+public:
+ RegReductionPriorityQueue(MachineFunction &mf,
+ bool tracksrp,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
+ Picker(this) {}
+
+ bool isBottomUp() const { return SF::IsBottomUp; }
+
+ bool isReady(SUnit *U) const {
+ return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
+ }
+
+ SUnit *pop() {
+ if (Queue.empty()) return NULL;
+
+ SUnit *V = popFromQueue(Queue, Picker);
+ V->NodeQueueId = 0;
+ return V;
+ }
+
+ void dump(ScheduleDAG *DAG) const {
+ // Emulate pop() without clobbering NodeQueueIds.
+ std::vector<SUnit*> DumpQueue = Queue;
+ SF DumpPicker = Picker;
+ while (!DumpQueue.empty()) {
+ SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
+ if (isBottomUp())
+ dbgs() << "Height " << SU->getHeight() << ": ";
+ else
+ dbgs() << "Depth " << SU->getDepth() << ": ";
+ SU->dump(DAG);
+ }
+ }
+};
+
+typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+BURegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<td_ls_rr_sort>
+TDRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<src_ls_rr_sort>
+SrcRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
+HybridBURRPriorityQueue;
+
+typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
+ILPBURRPriorityQueue;
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Static Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
/// Smaller number is the higher priority.
@@ -1045,413 +1563,283 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
if (SethiUllmanNumber == 0)
SethiUllmanNumber = 1;
-
+
return SethiUllmanNumber;
}
-namespace {
- template<class SF>
- class RegReductionPriorityQueue : public SchedulingPriorityQueue {
- std::vector<SUnit*> Queue;
- SF Picker;
- unsigned CurQueueId;
- bool TracksRegPressure;
-
- protected:
- // SUnits - The SUnits for the current graph.
- std::vector<SUnit> *SUnits;
-
- MachineFunction &MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const TargetLowering *TLI;
- ScheduleDAGRRList *scheduleDAG;
-
- // SethiUllmanNumbers - The SethiUllman number for each node.
- std::vector<unsigned> SethiUllmanNumbers;
-
- /// RegPressure - Tracking current reg pressure per register class.
- ///
- std::vector<unsigned> RegPressure;
-
- /// RegLimit - Tracking the number of allocatable registers per register
- /// class.
- std::vector<unsigned> RegLimit;
-
- public:
- RegReductionPriorityQueue(MachineFunction &mf,
- bool tracksrp,
- const TargetInstrInfo *tii,
- const TargetRegisterInfo *tri,
- const TargetLowering *tli)
- : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp),
- MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
- if (TracksRegPressure) {
- unsigned NumRC = TRI->getNumRegClasses();
- RegLimit.resize(NumRC);
- RegPressure.resize(NumRC);
- std::fill(RegLimit.begin(), RegLimit.end(), 0);
- std::fill(RegPressure.begin(), RegPressure.end(), 0);
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I)
- RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
- }
- }
-
- void initNodes(std::vector<SUnit> &sunits) {
- SUnits = &sunits;
- // Add pseudo dependency edges for two-address nodes.
- AddPseudoTwoAddrDeps();
- // Reroute edges to nodes with multiple uses.
- PrescheduleNodesWithMultipleUses();
- // Calculate node priorities.
- CalculateSethiUllmanNumbers();
- }
-
- void addNode(const SUnit *SU) {
- unsigned SUSize = SethiUllmanNumbers.size();
- if (SUnits->size() > SUSize)
- SethiUllmanNumbers.resize(SUSize*2, 0);
- CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
- }
-
- void updateNode(const SUnit *SU) {
- SethiUllmanNumbers[SU->NodeNum] = 0;
- CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
- }
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+void RegReductionPQBase::CalculateSethiUllmanNumbers() {
+ SethiUllmanNumbers.assign(SUnits->size(), 0);
- void releaseState() {
- SUnits = 0;
- SethiUllmanNumbers.clear();
- std::fill(RegPressure.begin(), RegPressure.end(), 0);
- }
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+ CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
+}
- unsigned getNodePriority(const SUnit *SU) const {
- assert(SU->NodeNum < SethiUllmanNumbers.size());
- unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
- if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
- // CopyToReg should be close to its uses to facilitate coalescing and
- // avoid spilling.
- return 0;
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
- Opc == TargetOpcode::SUBREG_TO_REG ||
- Opc == TargetOpcode::INSERT_SUBREG)
- // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
- // close to their uses to facilitate coalescing.
- return 0;
- if (SU->NumSuccs == 0 && SU->NumPreds != 0)
- // If SU does not have a register use, i.e. it doesn't produce a value
- // that would be consumed (e.g. store), then it terminates a chain of
- // computation. Give it a large SethiUllman number so it will be
- // scheduled right before its predecessors that it doesn't lengthen
- // their live ranges.
- return 0xffff;
- if (SU->NumPreds == 0 && SU->NumSuccs != 0)
- // If SU does not have a register def, schedule it close to its uses
- // because it does not lengthen any live ranges.
- return 0;
- return SethiUllmanNumbers[SU->NodeNum];
- }
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ if (!TracksRegPressure)
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+}
- unsigned getNodeOrdering(const SUnit *SU) const {
- return scheduleDAG->DAG->GetOrdering(SU->getNode());
- }
+void RegReductionPQBase::addNode(const SUnit *SU) {
+ unsigned SUSize = SethiUllmanNumbers.size();
+ if (SUnits->size() > SUSize)
+ SethiUllmanNumbers.resize(SUSize*2, 0);
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
- bool empty() const { return Queue.empty(); }
-
- void push(SUnit *U) {
- assert(!U->NodeQueueId && "Node in the queue already");
- U->NodeQueueId = ++CurQueueId;
- Queue.push_back(U);
- }
+void RegReductionPQBase::updateNode(const SUnit *SU) {
+ SethiUllmanNumbers[SU->NodeNum] = 0;
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
- SUnit *pop() {
- if (empty()) return NULL;
- std::vector<SUnit *>::iterator Best = Queue.begin();
- for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
- E = Queue.end(); I != E; ++I)
- if (Picker(*Best, *I))
- Best = I;
- SUnit *V = *Best;
- if (Best != prior(Queue.end()))
- std::swap(*Best, Queue.back());
- Queue.pop_back();
- V->NodeQueueId = 0;
- return V;
- }
+// Lower priority means schedule further down. For bottom-up scheduling, lower
+// priority SUs are scheduled before higher priority SUs.
+unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SethiUllmanNumbers.size());
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return 0;
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return 0;
+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+ // If SU does not have a register use, i.e. it doesn't produce a value
+ // that would be consumed (e.g. store), then it terminates a chain of
+ // computation. Give it a large SethiUllman number so it will be
+ // scheduled right before its predecessors that it doesn't lengthen
+ // their live ranges.
+ return 0xffff;
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return 0;
+ return SethiUllmanNumbers[SU->NodeNum];
+}
- void remove(SUnit *SU) {
- assert(!Queue.empty() && "Queue is empty!");
- assert(SU->NodeQueueId != 0 && "Not in queue!");
- std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
- SU);
- if (I != prior(Queue.end()))
- std::swap(*I, Queue.back());
- Queue.pop_back();
- SU->NodeQueueId = 0;
- }
+//===----------------------------------------------------------------------===//
+// Register Pressure Tracking
+//===----------------------------------------------------------------------===//
- bool HighRegPressure(const SUnit *SU) const {
- if (!TLI)
- return false;
+void RegReductionPQBase::dumpRegPressure() const {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ unsigned Id = RC->getID();
+ unsigned RP = RegPressure[Id];
+ if (!RP) continue;
+ DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
+ << '\n');
+ }
+}
- for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
- continue;
- SUnit *PredSU = I->getSUnit();
- const SDNode *PN = PredSU->getNode();
- if (!PN->isMachineOpcode()) {
- if (PN->getOpcode() == ISD::CopyFromReg) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- }
- continue;
- }
- unsigned POpc = PN->getMachineOpcode();
- if (POpc == TargetOpcode::IMPLICIT_DEF)
- continue;
- if (POpc == TargetOpcode::EXTRACT_SUBREG) {
- EVT VT = PN->getOperand(0).getValueType();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- // Check if this increases register pressure of the specific register
- // class to the point where it would cause spills.
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- continue;
- } else if (POpc == TargetOpcode::INSERT_SUBREG ||
- POpc == TargetOpcode::SUBREG_TO_REG) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- // Check if this increases register pressure of the specific register
- // class to the point where it would cause spills.
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- continue;
- }
- unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = PN->getValueType(i);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- if (RegPressure[RCId] >= RegLimit[RCId])
- return true; // Reg pressure already high.
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- if (!PN->hasAnyUseOfValue(i))
- continue;
- // Check if this increases register pressure of the specific register
- // class to the point where it would cause spills.
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- }
- }
+bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
+ if (!TLI)
+ return false;
- return false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ EVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned Cost = TLI->getRepRegClassCostFor(VT);
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
}
+ }
+ return false;
+}
- void ScheduledNode(SUnit *SU) {
- if (!TracksRegPressure)
- return;
-
- const SDNode *N = SU->getNode();
- if (!N->isMachineOpcode()) {
- if (N->getOpcode() != ISD::CopyToReg)
- return;
- } else {
- unsigned Opc = N->getMachineOpcode();
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
- Opc == TargetOpcode::INSERT_SUBREG ||
- Opc == TargetOpcode::SUBREG_TO_REG ||
- Opc == TargetOpcode::REG_SEQUENCE ||
- Opc == TargetOpcode::IMPLICIT_DEF)
- return;
- }
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) {
+ const SDNode *N = SU->getNode();
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
- continue;
- SUnit *PredSU = I->getSUnit();
- if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
- continue;
- const SDNode *PN = PredSU->getNode();
- if (!PN->isMachineOpcode()) {
- if (PN->getOpcode() == ISD::CopyFromReg) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
- continue;
- }
- unsigned POpc = PN->getMachineOpcode();
- if (POpc == TargetOpcode::IMPLICIT_DEF)
- continue;
- if (POpc == TargetOpcode::EXTRACT_SUBREG) {
- EVT VT = PN->getOperand(0).getValueType();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- } else if (POpc == TargetOpcode::INSERT_SUBREG ||
- POpc == TargetOpcode::SUBREG_TO_REG) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- }
- unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = PN->getValueType(i);
- if (!PN->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
- }
+ if (!N->isMachineOpcode() || !SU->NumSuccs)
+ return false;
- // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
- // may transfer data dependencies to CopyToReg.
- if (SU->NumSuccs && N->isMachineOpcode()) {
- unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = N->getValueType(i);
- if (!N->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
- // Register pressure tracking is imprecise. This can happen.
- RegPressure[RCId] = 0;
- else
- RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
- }
- }
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = N->getValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ return true;
+ }
+ return false;
+}
+
+void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
- dumpRegPressure();
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ // FIXME: The ScheduleDAG currently loses information about which of a
+ // node's values is consumed by each dependence. Consequently, if the node
+ // defines multiple register classes, we don't know which to pressurize
+ // here. Instead the following loop consumes the register defs in an
+ // arbitrary order. At least it handles the common case of clustered loads
+ // to the same class. For precise liveness, each SDep needs to indicate the
+ // result number. But that tightly couples the ScheduleDAG with the
+ // SelectionDAG making updates tricky. A simpler hack would be to attach a
+ // value type or register class to SDep.
+ //
+ // The most important aspect of register tracking is balancing the increase
+ // here with the reduction further below. Note that this SU may use multiple
+ // defs in PredSU. The can't be determined here, but we've already
+ // compensated by reducing NumRegDefsLeft in PredSU during
+ // ScheduleDAGSDNodes::AddSchedEdges.
+ --PredSU->NumRegDefsLeft;
+ unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs)
+ continue;
+ EVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ break;
}
+ }
- void UnscheduledNode(SUnit *SU) {
- if (!TracksRegPressure)
- return;
-
- const SDNode *N = SU->getNode();
- if (!N->isMachineOpcode()) {
- if (N->getOpcode() != ISD::CopyToReg)
- return;
- } else {
- unsigned Opc = N->getMachineOpcode();
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
- Opc == TargetOpcode::INSERT_SUBREG ||
- Opc == TargetOpcode::SUBREG_TO_REG ||
- Opc == TargetOpcode::REG_SEQUENCE ||
- Opc == TargetOpcode::IMPLICIT_DEF)
- return;
- }
+ // We should have this assert, but there may be dead SDNodes that never
+ // materialize as SUnits, so they don't appear to generate liveness.
+ //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
+ int SkipRegDefs = (int)SU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs > 0)
+ continue;
+ EVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
+ // Register pressure tracking is imprecise. This can happen. But we try
+ // hard not to let it happen because it likely results in poor scheduling.
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
+ RegPressure[RCId] = 0;
+ }
+ else {
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+ dumpRegPressure();
+}
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
- continue;
- SUnit *PredSU = I->getSUnit();
- if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
- continue;
- const SDNode *PN = PredSU->getNode();
- if (!PN->isMachineOpcode()) {
- if (PN->getOpcode() == ISD::CopyFromReg) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
- continue;
- }
- unsigned POpc = PN->getMachineOpcode();
- if (POpc == TargetOpcode::IMPLICIT_DEF)
- continue;
- if (POpc == TargetOpcode::EXTRACT_SUBREG) {
- EVT VT = PN->getOperand(0).getValueType();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- } else if (POpc == TargetOpcode::INSERT_SUBREG ||
- POpc == TargetOpcode::SUBREG_TO_REG) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- }
- unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = PN->getValueType(i);
- if (!PN->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
- // Register pressure tracking is imprecise. This can happen.
- RegPressure[RCId] = 0;
- else
- RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
- }
- }
+void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ const SDNode *N = SU->getNode();
+ if (!N->isMachineOpcode()) {
+ if (N->getOpcode() != ISD::CopyToReg)
+ return;
+ } else {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::REG_SEQUENCE ||
+ Opc == TargetOpcode::IMPLICIT_DEF)
+ return;
+ }
- // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
- // may transfer data dependencies to CopyToReg.
- if (SU->NumSuccs && N->isMachineOpcode()) {
- unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
- EVT VT = N->getValueType(i);
- if (VT == MVT::Flag || VT == MVT::Other)
- continue;
- if (!N->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
+ // counts data deps.
+ if (PredSU->NumSuccsLeft != PredSU->Succs.size())
+ continue;
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
}
-
- dumpRegPressure();
+ continue;
}
-
- void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
- scheduleDAG = scheduleDag;
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+ EVT VT = PN->getOperand(0).getValueType();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
}
-
- void dumpRegPressure() const {
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I) {
- const TargetRegisterClass *RC = *I;
- unsigned Id = RC->getID();
- unsigned RP = RegPressure[Id];
- if (!RP) continue;
- DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
- << '\n');
- }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = PN->getValueType(i);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+ // Register pressure tracking is imprecise. This can happen.
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
}
+ }
- protected:
- bool canClobber(const SUnit *SU, const SUnit *Op);
- void AddPseudoTwoAddrDeps();
- void PrescheduleNodesWithMultipleUses();
- void CalculateSethiUllmanNumbers();
- };
-
- typedef RegReductionPriorityQueue<bu_ls_rr_sort>
- BURegReductionPriorityQueue;
-
- typedef RegReductionPriorityQueue<td_ls_rr_sort>
- TDRegReductionPriorityQueue;
-
- typedef RegReductionPriorityQueue<src_ls_rr_sort>
- SrcRegReductionPriorityQueue;
-
- typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
- HybridBURRPriorityQueue;
+ // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+ // may transfer data dependencies to CopyToReg.
+ if (SU->NumSuccs && N->isMachineOpcode()) {
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ }
- typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
- ILPBURRPriorityQueue;
+ dumpRegPressure();
}
+//===----------------------------------------------------------------------===//
+// Dynamic Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
/// closestSucc - Returns the scheduled cycle of the successor which is
/// closest to the current cycle.
static unsigned closestSucc(const SUnit *SU) {
@@ -1483,9 +1871,123 @@ static unsigned calcMaxScratches(const SUnit *SU) {
return Scratches;
}
-template <typename RRSort>
-static bool BURRSort(const SUnit *left, const SUnit *right,
- const RegReductionPriorityQueue<RRSort> *SPQ) {
+/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
+/// CopyToReg to a virtual register. This SU def is probably a liveout and
+/// it has no other use. It should be scheduled closer to the terminator.
+static bool hasOnlyLiveOutUses(const SUnit *SU) {
+ bool RetVal = false;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+/// UnitsSharePred - Return true if the two scheduling units share a common
+/// data predecessor.
+static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
+ SmallSet<const SUnit*, 4> Preds;
+ for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ Preds.insert(I->getSUnit());
+ }
+ for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (Preds.count(I->getSUnit()))
+ return true;
+ }
+ return false;
+}
+
+// Check for either a dependence (latency) or resource (hazard) stall.
+//
+// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
+static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
+ if ((int)SPQ->getCurCycle() < Height) return true;
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return true;
+ return false;
+}
+
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if latency-based priority is equivalent.
+static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
+ RegReductionPQBase *SPQ) {
+ // If the two nodes share an operand and one of them has a single
+ // use that is a live out copy, favor the one that is live out. Otherwise
+ // it will be difficult to eliminate the copy if the instruction is a
+ // loop induction variable update. e.g.
+ // BB:
+ // sub r1, r3, #1
+ // str r0, [r2, r3]
+ // mov r3, r1
+ // cmp
+ // bne BB
+ bool SharePred = UnitsSharePred(left, right);
+ // FIXME: Only adjust if BB is a loop back edge.
+ // FIXME: What's the cost of a copy?
+ int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
+ int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
+ int LHeight = (int)left->getHeight() - LBonus;
+ int RHeight = (int)right->getHeight() - RBonus;
+
+ bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
+ BUHasStall(left, LHeight, SPQ);
+ bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
+ BUHasStall(right, RHeight, SPQ);
+
+ // If scheduling one of the node will cause a pipeline stall, delay it.
+ // If scheduling either one of the node will cause a pipeline stall, sort
+ // them according to their height.
+ if (LStall) {
+ if (!RStall)
+ return 1;
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ } else if (RStall)
+ return -1;
+
+ // If either node is scheduling for latency, sort them by height/depth
+ // and latency.
+ if (!checkPref || (left->SchedulingPref == Sched::Latency ||
+ right->SchedulingPref == Sched::Latency)) {
+ if (DisableSchedCycles) {
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ }
+ else {
+ // If neither instruction stalls (!LStall && !RStall) then
+ // it's height is already covered so only its depth matters. We also reach
+ // this if both stall but have the same height.
+ unsigned LDepth = left->getDepth();
+ unsigned RDepth = right->getDepth();
+ if (LDepth != RDepth) {
+ DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
+ return LDepth < RDepth ? 1 : -1;
+ }
+ }
+ if (left->Latency != right->Latency)
+ return left->Latency > right->Latency ? 1 : -1;
+ }
+ return 0;
+}
+
+static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
unsigned LPriority = SPQ->getNodePriority(left);
unsigned RPriority = SPQ->getNodePriority(right);
if (LPriority != RPriority)
@@ -1519,24 +2021,31 @@ static bool BURRSort(const SUnit *left, const SUnit *right,
if (LScratch != RScratch)
return LScratch > RScratch;
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
-
- if (left->getDepth() != right->getDepth())
- return left->getDepth() < right->getDepth();
+ if (!DisableSchedCycles) {
+ int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ else {
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
- assert(left->NodeQueueId && right->NodeQueueId &&
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
+ }
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
return (left->NodeQueueId > right->NodeQueueId);
}
// Bottom up
-bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
return BURRSort(left, right, SPQ);
}
// Source order, otherwise bottom up.
-bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
unsigned LOrder = SPQ->getNodeOrdering(left);
unsigned ROrder = SPQ->getNodeOrdering(right);
@@ -1548,49 +2057,69 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
return BURRSort(left, right, SPQ);
}
-bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+// If the time between now and when the instruction will be ready can cover
+// the spill code, then avoid adding it to the ready queue. This gives long
+// stalls highest priority and allows hoisting across calls. It should also
+// speed up processing the available queue.
+bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ static const unsigned ReadyDelay = 3;
+
+ if (SPQ->MayReduceRegPressure(SU)) return true;
+
+ if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+// Return true if right should be scheduled with higher priority than left.
+bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
bool LHigh = SPQ->HighRegPressure(left);
bool RHigh = SPQ->HighRegPressure(right);
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
- if (LHigh && !RHigh)
+ if (LHigh && !RHigh) {
+ DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
+ << right->NodeNum << ")\n");
return true;
- else if (!LHigh && RHigh)
+ }
+ else if (!LHigh && RHigh) {
+ DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
+ << left->NodeNum << ")\n");
return false;
+ }
else if (!LHigh && !RHigh) {
- // Low register pressure situation, schedule for latency if possible.
- bool LStall = left->SchedulingPref == Sched::Latency &&
- SPQ->getCurCycle() < left->getHeight();
- bool RStall = right->SchedulingPref == Sched::Latency &&
- SPQ->getCurCycle() < right->getHeight();
- // If scheduling one of the node will cause a pipeline stall, delay it.
- // If scheduling either one of the node will cause a pipeline stall, sort
- // them according to their height.
- // If neither will cause a pipeline stall, try to reduce register pressure.
- if (LStall) {
- if (!RStall)
- return true;
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
- } else if (RStall)
- return false;
-
- // If either node is scheduling for latency, sort them by height and latency
- // first.
- if (left->SchedulingPref == Sched::Latency ||
- right->SchedulingPref == Sched::Latency) {
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
- if (left->Latency != right->Latency)
- return left->Latency > right->Latency;
- }
+ int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
}
-
return BURRSort(left, right, SPQ);
}
-bool ilp_ls_rr_sort::operator()(const SUnit *left,
- const SUnit *right) const {
+// Schedule as many instructions in each cycle as possible. So don't make an
+// instruction available unless it is ready in the current cycle.
+bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ if (SU->getHeight() > CurCycle) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return SU->getHeight() <= CurCycle;
+}
+
+bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
bool LHigh = SPQ->HighRegPressure(left);
bool RHigh = SPQ->HighRegPressure(right);
// Avoid causing spills. If register pressure is high, schedule for
@@ -1611,9 +2140,11 @@ bool ilp_ls_rr_sort::operator()(const SUnit *left,
return BURRSort(left, right, SPQ);
}
-template<class SF>
-bool
-RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
+//===----------------------------------------------------------------------===//
+// Preschedule for Register Pressure
+//===----------------------------------------------------------------------===//
+
+bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
if (SU->isTwoAddress) {
unsigned Opc = SU->getNode()->getMachineOpcode();
const TargetInstrDesc &TID = TII->get(Opc);
@@ -1631,19 +2162,6 @@ RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
return false;
}
-/// hasCopyToRegUse - Return true if SU has a value successor that is a
-/// CopyToReg node.
-static bool hasCopyToRegUse(const SUnit *SU) {
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isCtrl()) continue;
- const SUnit *SuccSU = I->getSUnit();
- if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
- return true;
- }
- return false;
-}
-
/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
/// physical register defs.
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
@@ -1654,7 +2172,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
assert(ImpDefs && "Caller should check hasPhysRegDefs");
for (const SDNode *SUNode = SU->getNode(); SUNode;
- SUNode = SUNode->getFlaggedNode()) {
+ SUNode = SUNode->getGluedNode()) {
if (!SUNode->isMachineOpcode())
continue;
const unsigned *SUImpDefs =
@@ -1663,7 +2181,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
return false;
for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
- if (VT == MVT::Flag || VT == MVT::Other)
+ if (VT == MVT::Glue || VT == MVT::Other)
continue;
if (!N->hasAnyUseOfValue(i))
continue;
@@ -1709,8 +2227,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
/// after N, which shortens the U->N live range, reducing
/// register pressure.
///
-template<class SF>
-void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
+void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// Visit all the nodes in topological order, working top-down.
for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
SUnit *SU = &(*SUnits)[i];
@@ -1748,7 +2265,7 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
if (PredSU->NumSuccs == 1)
continue;
// Avoid prescheduling to copies from virtual registers, which don't behave
- // like other nodes from the perspective of scheduling // heuristics.
+ // like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU->getNode())
if (N->getOpcode() == ISD::CopyFromReg &&
TargetRegisterInfo::isVirtualRegister
@@ -1802,17 +2319,17 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
/// one that has a CopyToReg use (more likely to be a loop induction update).
/// If both are two-address, but one is commutable while the other is not
/// commutable, favor the one that's not commutable.
-template<class SF>
-void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
+void RegReductionPQBase::AddPseudoTwoAddrDeps() {
for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
SUnit *SU = &(*SUnits)[i];
if (!SU->isTwoAddress)
continue;
SDNode *Node = SU->getNode();
- if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
+ if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
continue;
+ bool isLiveOut = hasOnlyLiveOutUses(SU);
unsigned Opc = Node->getMachineOpcode();
const TargetInstrDesc &TID = TII->get(Opc);
unsigned NumRes = TID.getNumDefs();
@@ -1862,7 +2379,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
SuccOpc == TargetOpcode::SUBREG_TO_REG)
continue;
if ((!canClobber(SuccSU, DUSU) ||
- (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
+ (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
(!SU->isCommutable && SuccSU->isCommutable)) &&
!scheduleDAG->IsReachable(SuccSU, SU)) {
DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
@@ -1877,20 +2394,10 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
}
}
-/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
-/// scheduling units.
-template<class SF>
-void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
- SethiUllmanNumbers.assign(SUnits->size(), 0);
-
- for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
- CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
-}
-
/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
/// predecessors of the successors of the SUnit SU. Stop when the provided
/// limit is exceeded.
-static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
+static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
unsigned Limit) {
unsigned Sum = 0;
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -1942,7 +2449,7 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
if (left->NumSuccsLeft != right->NumSuccsLeft)
return left->NumSuccsLeft > right->NumSuccsLeft;
- assert(left->NodeQueueId && right->NodeQueueId &&
+ assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
return (left->NodeQueueId > right->NodeQueueId);
}
@@ -1952,68 +2459,74 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
//===----------------------------------------------------------------------===//
llvm::ScheduleDAGSDNodes *
-llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
+
BURegReductionPriorityQueue *PQ =
new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
- return SD;
+ return SD;
}
llvm::ScheduleDAGSDNodes *
-llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
+
TDRegReductionPriorityQueue *PQ =
new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
return SD;
}
llvm::ScheduleDAGSDNodes *
-llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
+
SrcRegReductionPriorityQueue *PQ =
new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
- return SD;
+ return SD;
}
llvm::ScheduleDAGSDNodes *
-llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
const TargetLowering *TLI = &IS->getTargetLowering();
-
+
HybridBURRPriorityQueue *PQ =
new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
+
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
PQ->setScheduleDAG(SD);
- return SD;
+ return SD;
}
llvm::ScheduleDAGSDNodes *
-llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
const TargetLowering *TLI = &IS->getTargetLowering();
-
+
ILPBURRPriorityQueue *PQ =
new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
PQ->setScheduleDAG(SD);
- return SD;
+ return SD;
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index f1bf82a..477c1ff 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -34,8 +34,8 @@ using namespace llvm;
STATISTIC(LoadsClustered, "Number of loads clustered together");
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
- : ScheduleDAG(mf) {
-}
+ : ScheduleDAG(mf),
+ InstrItins(mf.getTarget().getInstrItineraryData()) {}
/// Run - perform scheduling.
///
@@ -72,6 +72,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
SUnit *SU = NewSUnit(Old->getNode());
SU->OrigNode = Old->OrigNode;
SU->Latency = Old->Latency;
+ SU->isCall = Old->isCall;
SU->isTwoAddress = Old->isTwoAddress;
SU->isCommutable = Old->isCommutable;
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
@@ -85,7 +86,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
/// a specified operand is a physical register dependency. If so, returns the
/// register and the cost of copying the register.
static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
- const TargetRegisterInfo *TRI,
+ const TargetRegisterInfo *TRI,
const TargetInstrInfo *TII,
unsigned &PhysReg, int &Cost) {
if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
@@ -108,29 +109,28 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
}
}
-static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
- SelectionDAG *DAG) {
+static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
SmallVector<EVT, 4> VTs;
- SDNode *FlagDestNode = Flag.getNode();
+ SDNode *GlueDestNode = Glue.getNode();
- // Don't add a flag from a node to itself.
- if (FlagDestNode == N) return;
+ // Don't add glue from a node to itself.
+ if (GlueDestNode == N) return;
- // Don't add a flag to something which already has a flag.
- if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return;
+ // Don't add glue to something which already has glue.
+ if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return;
for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
VTs.push_back(N->getValueType(I));
- if (AddFlag)
- VTs.push_back(MVT::Flag);
+ if (AddGlue)
+ VTs.push_back(MVT::Glue);
SmallVector<SDValue, 4> Ops;
for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
Ops.push_back(N->getOperand(I));
- if (FlagDestNode)
- Ops.push_back(Flag);
+ if (GlueDestNode)
+ Ops.push_back(Glue);
SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
MachineSDNode::mmo_iterator Begin = 0, End = 0;
@@ -149,9 +149,9 @@ static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
MN->setMemRefs(Begin, End);
}
-/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
+/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
/// This function finds loads of the same base and different offsets. If the
-/// offsets are not far apart (target specific), it add MVT::Flag inputs and
+/// offsets are not far apart (target specific), it add MVT::Glue inputs and
/// outputs to ensure they are scheduled together and in order. This
/// optimization may benefit some targets by improving cache locality.
void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
@@ -213,20 +213,20 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
if (NumLoads == 0)
return;
- // Cluster loads by adding MVT::Flag outputs and inputs. This also
+ // Cluster loads by adding MVT::Glue outputs and inputs. This also
// ensure they are scheduled in order of increasing addresses.
SDNode *Lead = Loads[0];
- AddFlags(Lead, SDValue(0, 0), true, DAG);
+ AddGlue(Lead, SDValue(0, 0), true, DAG);
- SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1);
+ SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1);
for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
- bool OutFlag = I < E - 1;
+ bool OutGlue = I < E - 1;
SDNode *Load = Loads[I];
- AddFlags(Load, InFlag, OutFlag, DAG);
+ AddGlue(Load, InGlue, OutGlue, DAG);
- if (OutFlag)
- InFlag = SDValue(Load, Load->getNumValues() - 1);
+ if (OutGlue)
+ InGlue = SDValue(Load, Load->getNumValues() - 1);
++LoadsClustered;
}
@@ -266,68 +266,75 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// FIXME: Multiply by 2 because we may clone nodes during scheduling.
// This is a temporary workaround.
SUnits.reserve(NumNodes * 2);
-
+
// Add all nodes in depth first order.
SmallVector<SDNode*, 64> Worklist;
SmallPtrSet<SDNode*, 64> Visited;
Worklist.push_back(DAG->getRoot().getNode());
Visited.insert(DAG->getRoot().getNode());
-
+
while (!Worklist.empty()) {
SDNode *NI = Worklist.pop_back_val();
-
+
// Add all operands to the worklist unless they've already been added.
for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
if (Visited.insert(NI->getOperand(i).getNode()))
Worklist.push_back(NI->getOperand(i).getNode());
-
+
if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
continue;
-
+
// If this node has already been processed, stop now.
if (NI->getNodeId() != -1) continue;
-
+
SUnit *NodeSUnit = NewSUnit(NI);
-
- // See if anything is flagged to this node, if so, add them to flagged
- // nodes. Nodes can have at most one flag input and one flag output. Flags
- // are required to be the last operand and result of a node.
-
- // Scan up to find flagged preds.
+
+ // See if anything is glued to this node, if so, add them to glued
+ // nodes. Nodes can have at most one glue input and one glue output. Glue
+ // is required to be the last operand and result of a node.
+
+ // Scan up to find glued preds.
SDNode *N = NI;
while (N->getNumOperands() &&
- N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
N = N->getOperand(N->getNumOperands()-1).getNode();
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
}
-
- // Scan down to find any flagged succs.
+
+ // Scan down to find any glued succs.
N = NI;
- while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
- SDValue FlagVal(N, N->getNumValues()-1);
-
- // There are either zero or one users of the Flag result.
- bool HasFlagUse = false;
- for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
+ SDValue GlueVal(N, N->getNumValues()-1);
+
+ // There are either zero or one users of the Glue result.
+ bool HasGlueUse = false;
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
UI != E; ++UI)
- if (FlagVal.isOperandOf(*UI)) {
- HasFlagUse = true;
+ if (GlueVal.isOperandOf(*UI)) {
+ HasGlueUse = true;
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
N = *UI;
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
break;
}
- if (!HasFlagUse) break;
+ if (!HasGlueUse) break;
}
-
- // If there are flag operands involved, N is now the bottom-most node
- // of the sequence of nodes that are flagged together.
+
+ // If there are glue operands involved, N is now the bottom-most node
+ // of the sequence of nodes that are glued together.
// Update the SUnit.
NodeSUnit->setNode(N);
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
+ // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
+ InitNumRegDefsLeft(NodeSUnit);
+
// Assign the Latency field of NodeSUnit using target-provided information.
ComputeLatency(NodeSUnit);
}
@@ -343,7 +350,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
SUnit *SU = &SUnits[su];
SDNode *MainNode = SU->getNode();
-
+
if (MainNode->isMachineOpcode()) {
unsigned Opc = MainNode->getMachineOpcode();
const TargetInstrDesc &TID = TII->get(Opc);
@@ -356,9 +363,9 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
if (TID.isCommutable())
SU->isCommutable = true;
}
-
+
// Find all predecessors and successors of the group.
- for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
if (N->isMachineOpcode() &&
TII->get(N->getMachineOpcode()).getImplicitDefs()) {
SU->hasPhysRegClobbers = true;
@@ -368,7 +375,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
SU->hasPhysRegDefs = true;
}
-
+
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDNode *OpN = N->getOperand(i).getNode();
if (isPassiveNode(OpN)) continue; // Not scheduled.
@@ -377,7 +384,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
if (OpSU == SU) continue; // In the same group.
EVT OpVT = N->getOperand(i).getValueType();
- assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
+ assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
bool isChain = OpVT == MVT::Other;
unsigned PhysReg = 0;
@@ -403,7 +410,13 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
}
- SU->addPred(dep);
+ if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) {
+ // Multiple register uses are combined in the same SUnit. For example,
+ // we could have a set of glued nodes with all their defs consumed by
+ // another set of glued nodes. Register pressure tracking sees this as
+ // a single use, so to keep pressure balanced we reduce the defs.
+ --OpSU->NumRegDefsLeft;
+ }
}
}
}
@@ -412,7 +425,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
-/// flagged together nodes with a single SUnit.
+/// glued together nodes with a single SUnit.
void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
// Cluster certain nodes which should be scheduled together.
ClusterNodes();
@@ -422,6 +435,69 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
AddSchedEdges();
}
+// Initialize NumNodeDefs for the current Node's opcode.
+void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+ if (!Node->isMachineOpcode()) {
+ if (Node->getOpcode() == ISD::CopyFromReg)
+ NodeNumDefs = 1;
+ else
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned POpc = Node->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF) {
+ // No register need be allocated for this.
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
+ // Some instructions define regs that are not represented in the selection DAG
+ // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
+ NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
+ DefIdx = 0;
+}
+
+// Construct a RegDefIter for this SUnit and find the first valid value.
+ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
+ const ScheduleDAGSDNodes *SD)
+ : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+ InitNodeNumDefs();
+ Advance();
+}
+
+// Advance to the next valid value defined by the SUnit.
+void ScheduleDAGSDNodes::RegDefIter::Advance() {
+ for (;Node;) { // Visit all glued nodes.
+ for (;DefIdx < NodeNumDefs; ++DefIdx) {
+ if (!Node->hasAnyUseOfValue(DefIdx))
+ continue;
+ if (Node->isMachineOpcode() &&
+ Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) {
+ // Propagate the incoming (full-register) type. I doubt it's needed.
+ ValueType = Node->getOperand(0).getValueType();
+ }
+ else {
+ ValueType = Node->getValueType(DefIdx);
+ }
+ ++DefIdx;
+ return; // Found a normal regdef.
+ }
+ Node = Node->getGluedNode();
+ if (Node == NULL) {
+ return; // No values left to visit.
+ }
+ InitNodeNumDefs();
+ }
+}
+
+void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
+ assert(SU->NumRegDefsLeft == 0 && "expect a new node");
+ for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
+ assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
+ ++SU->NumRegDefsLeft;
+ }
+}
+
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
// Check to see if the scheduler cares about latencies.
if (ForceUnitLatencies()) {
@@ -429,20 +505,17 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
return;
}
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
- if (InstrItins.isEmpty()) {
+ if (!InstrItins || InstrItins->isEmpty()) {
SU->Latency = 1;
return;
}
-
+
// Compute the latency for the node. We use the sum of the latencies for
- // all nodes flagged together into this SUnit.
+ // all nodes glued together into this SUnit.
SU->Latency = 0;
- for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
- if (N->isMachineOpcode()) {
- SU->Latency += InstrItins.
- getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
- }
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode())
+ SU->Latency += TII->getInstrLatency(InstrItins, N);
}
void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
@@ -451,32 +524,25 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
if (ForceUnitLatencies())
return;
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
- if (InstrItins.isEmpty())
- return;
-
if (dep.getKind() != SDep::Data)
return;
unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
- if (Def->isMachineOpcode()) {
- const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
- if (DefIdx >= II.getNumDefs())
- return;
- int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx);
- if (DefCycle < 0)
- return;
- int UseCycle = 1;
- if (Use->isMachineOpcode()) {
- const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
- UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
- }
- if (UseCycle >= 0) {
- int Latency = DefCycle - UseCycle + 1;
- if (Latency >= 0)
- dep.setLatency(Latency);
- }
+ if (Use->isMachineOpcode())
+ // Adjust the use operand index by num of defs.
+ OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
+ int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+ if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+ !BB->succ_empty()) {
+ unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ // This copy is a liveout value. It is likely coalesced, so reduce the
+ // latency so not to penalize the def.
+ // FIXME: need target specific adjustment here?
+ Latency = (Latency > 1) ? Latency - 1 : 1;
}
+ if (Latency >= 0)
+ dep.setLatency(Latency);
}
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
@@ -487,14 +553,14 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
SU->getNode()->dump(DAG);
dbgs() << "\n";
- SmallVector<SDNode *, 4> FlaggedNodes;
- for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
- FlaggedNodes.push_back(N);
- while (!FlaggedNodes.empty()) {
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
dbgs() << " ";
- FlaggedNodes.back()->dump(DAG);
+ GluedNodes.back()->dump(DAG);
dbgs() << "\n";
- FlaggedNodes.pop_back();
+ GluedNodes.pop_back();
}
}
@@ -507,37 +573,25 @@ namespace {
};
}
-// ProcessSourceNode - Process nodes with source order numbers. These are added
-// to a vector which EmitSchedule uses to determine how to insert dbg_value
-// instructions in the right order.
-static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
- InstrEmitter &Emitter,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node.
+static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
+ InstrEmitter &Emitter,
SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
- SmallSet<unsigned, 8> &Seen) {
- unsigned Order = DAG->GetOrdering(N);
- if (!Order || !Seen.insert(Order))
- return;
-
- MachineBasicBlock *BB = Emitter.getBlock();
- if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
- // Did not insert any instruction.
- Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
- return;
- }
-
- Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ unsigned Order) {
if (!N->getHasDebugValue())
return;
+
// Opportunistically insert immediate dbg_value uses, i.e. those with source
// order number right after the N.
+ MachineBasicBlock *BB = Emitter.getBlock();
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N);
for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
if (DVs[i]->isInvalidated())
continue;
unsigned DVOrder = DVs[i]->getOrder();
- if (DVOrder == ++Order) {
+ if (!Order || DVOrder == ++Order) {
MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
if (DbgMI) {
Orders.push_back(std::make_pair(DVOrder, DbgMI));
@@ -548,6 +602,33 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
}
}
+// ProcessSourceNode - Process nodes with source order numbers. These are added
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
+// instructions in the right order.
+static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
+ InstrEmitter &Emitter,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+ SmallSet<unsigned, 8> &Seen) {
+ unsigned Order = DAG->GetOrdering(N);
+ if (!Order || !Seen.insert(Order)) {
+ // Process any valid SDDbgValues even if node does not have any order
+ // assigned.
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
+ return;
+ }
+
+ MachineBasicBlock *BB = Emitter.getBlock();
+ if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
+ // Did not insert any instruction.
+ Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
+ return;
+ }
+
+ Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
+}
+
/// EmitSchedule - Emit the machine code in scheduled order.
MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
@@ -578,25 +659,25 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
}
// For pre-regalloc scheduling, create instructions corresponding to the
- // SDNode and any flagged SDNodes and append them to the block.
+ // SDNode and any glued SDNodes and append them to the block.
if (!SU->getNode()) {
// Emit a copy.
EmitPhysRegCopy(SU, CopyVRBaseMap);
continue;
}
- SmallVector<SDNode *, 4> FlaggedNodes;
- for (SDNode *N = SU->getNode()->getFlaggedNode(); N;
- N = N->getFlaggedNode())
- FlaggedNodes.push_back(N);
- while (!FlaggedNodes.empty()) {
- SDNode *N = FlaggedNodes.back();
- Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N;
+ N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ SDNode *N = GluedNodes.back();
+ Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
VRBaseMap);
// Remember the source order of the inserted instruction.
if (HasDbg)
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
- FlaggedNodes.pop_back();
+ GluedNodes.pop_back();
}
Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
VRBaseMap);
@@ -625,16 +706,8 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
// Insert all SDDbgValue's whose order(s) are before "Order".
if (!MI)
continue;
-#ifndef NDEBUG
- unsigned LastDIOrder = 0;
-#endif
for (; DI != DE &&
(*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
-#ifndef NDEBUG
- assert((*DI)->getOrder() >= LastDIOrder &&
- "SDDbgValue nodes must be in source order!");
- LastDIOrder = (*DI)->getOrder();
-#endif
if ((*DI)->isInvalidated())
continue;
MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 842fc8c..cc7310e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -20,13 +20,13 @@
namespace llvm {
/// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
- ///
+ ///
/// Edges between SUnits are initially based on edges in the SelectionDAG,
/// and additional edges can be added by the schedulers as heuristics.
/// SDNodes such as Constants, Registers, and a few others that are not
/// interesting to schedulers are not allocated SUnits.
///
- /// SDNodes with MVT::Flag operands are grouped along with the flagged
+ /// SDNodes with MVT::Glue operands are grouped along with the flagged
/// nodes into a single SUnit so that they are scheduled together.
///
/// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
@@ -36,6 +36,7 @@ namespace llvm {
class ScheduleDAGSDNodes : public ScheduleDAG {
public:
SelectionDAG *DAG; // DAG of the current basic block
+ const InstrItineraryData *InstrItins;
explicit ScheduleDAGSDNodes(MachineFunction &mf);
@@ -72,13 +73,17 @@ namespace llvm {
/// predecessors / successors info nor the temporary scheduling states.
///
SUnit *Clone(SUnit *N);
-
+
/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
virtual void BuildSchedGraph(AliasAnalysis *AA);
+ /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+ ///
+ void InitNumRegDefsLeft(SUnit *SU);
+
/// ComputeLatency - Compute node latency.
///
virtual void ComputeLatency(SUnit *SU);
@@ -105,6 +110,30 @@ namespace llvm {
virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+ /// RegDefIter - In place iteration over the values defined by an
+ /// SUnit. This does not need copies of the iterator or any other STLisms.
+ /// The iterator creates itself, rather than being provided by the SchedDAG.
+ class RegDefIter {
+ const ScheduleDAGSDNodes *SchedDAG;
+ const SDNode *Node;
+ unsigned DefIdx;
+ unsigned NodeNumDefs;
+ EVT ValueType;
+ public:
+ RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
+
+ bool IsValid() const { return Node != NULL; }
+
+ EVT GetValue() const {
+ assert(IsValid() && "bad iterator");
+ return ValueType;
+ }
+
+ void Advance();
+ private:
+ void InitNodeNumDefs();
+ };
+
private:
/// ClusterNeighboringLoads - Cluster loads from "near" addresses into
/// combined SUnits.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index ad06ebd..2fb2f2d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -31,7 +31,6 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetOptions.h"
@@ -44,7 +43,7 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -111,7 +110,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
/// BUILD_VECTOR where all of the elements are ~0 or undef.
bool ISD::isBuildVectorAllOnes(const SDNode *N) {
// Look through a bit convert.
- if (N->getOpcode() == ISD::BIT_CONVERT)
+ if (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -152,7 +151,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
/// BUILD_VECTOR where all of the elements are 0 or undef.
bool ISD::isBuildVectorAllZeros(const SDNode *N) {
// Look through a bit convert.
- if (N->getOpcode() == ISD::BIT_CONVERT)
+ if (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -199,6 +198,8 @@ bool ISD::isScalarToVector(const SDNode *N) {
if (N->getOperand(0).getOpcode() == ISD::UNDEF)
return false;
unsigned NumElems = N->getNumOperands();
+ if (NumElems == 1)
+ return false;
for (unsigned i = 1; i < NumElems; ++i) {
SDValue V = N->getOperand(i);
if (V.getOpcode() != ISD::UNDEF)
@@ -489,7 +490,7 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
/// doNotCSE - Return true if CSE should not be performed for this node.
static bool doNotCSE(SDNode *N) {
- if (N->getValueType(0) == MVT::Flag)
+ if (N->getValueType(0) == MVT::Glue)
return true; // Never CSE anything that produces a flag.
switch (N->getOpcode()) {
@@ -501,7 +502,7 @@ static bool doNotCSE(SDNode *N) {
// Check that remaining values produced are not flags.
for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
- if (N->getValueType(i) == MVT::Flag)
+ if (N->getValueType(i) == MVT::Glue)
return true; // Never CSE anything that produces a flag.
return false;
@@ -609,9 +610,6 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
bool Erased = false;
switch (N->getOpcode()) {
- case ISD::EntryToken:
- llvm_unreachable("EntryToken should not be in CSEMaps!");
- return false;
case ISD::HANDLENODE: return false; // noop.
case ISD::CONDCODE:
assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
@@ -641,6 +639,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
}
default:
// Remove it from the CSE Map.
+ assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
+ assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
Erased = CSEMap.RemoveNode(N);
break;
}
@@ -648,7 +648,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
// Verify that the node was actually in one of the CSE maps, unless it has a
// flag result (which cannot be CSE'd) or is one of the special cases that are
// not subject to CSE.
- if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
+ if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
!N->isMachineOpcode() && !doNotCSE(N)) {
N->dump(this);
dbgs() << "\n";
@@ -743,8 +743,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
return Node;
}
-/// VerifyNode - Sanity check the given node. Aborts if it is invalid.
-void SelectionDAG::VerifyNode(SDNode *N) {
+#ifndef NDEBUG
+/// VerifyNodeCommon - Sanity check the given node. Aborts if it is invalid.
+static void VerifyNodeCommon(SDNode *N) {
switch (N->getOpcode()) {
default:
break;
@@ -778,6 +779,44 @@ void SelectionDAG::VerifyNode(SDNode *N) {
}
}
+/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+ // The SDNode allocators cannot be used to allocate nodes with fields that are
+ // not present in an SDNode!
+ assert(!isa<MemSDNode>(N) && "Bad MemSDNode!");
+ assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!");
+ assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!");
+ assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!");
+ assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!");
+ assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!");
+ assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!");
+ assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!");
+ assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!");
+ assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!");
+ assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!");
+ assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!");
+ assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!");
+ assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!");
+ assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!");
+ assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!");
+ assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!");
+ assert(!isa<VTSDNode>(N) && "Bad VTSDNode!");
+ assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!");
+
+ VerifyNodeCommon(N);
+}
+
+/// VerifyMachineNode - Sanity check the given MachineNode. Aborts if it is
+/// invalid.
+static void VerifyMachineNode(SDNode *N) {
+ // The MachineNode allocators cannot be used to allocate nodes with fields
+ // that are not present in a MachineNode!
+ // Currently there are no such nodes.
+
+ VerifyNodeCommon(N);
+}
+#endif // NDEBUG
+
/// getEVTAlignment - Compute the default alignment value for the
/// given type.
///
@@ -1315,7 +1354,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
-
+
SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
@@ -1365,11 +1404,11 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);
ID.AddPointer(MD);
-
+
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
-
+
SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
@@ -1613,7 +1652,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// Also compute a conserative estimate for high known-0 bits.
// More trickiness is possible, but this is sufficient for the
// interesting case of alignment computation.
- KnownOne.clear();
+ KnownOne.clearAllBits();
unsigned TrailZ = KnownZero.countTrailingOnes() +
KnownZero2.countTrailingOnes();
unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
@@ -1636,8 +1675,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
AllOnes, KnownZero2, KnownOne2, Depth+1);
unsigned LeadZ = KnownZero2.countLeadingOnes();
- KnownOne2.clear();
- KnownZero2.clear();
+ KnownOne2.clearAllBits();
+ KnownZero2.clearAllBits();
ComputeMaskedBits(Op.getOperand(1),
AllOnes, KnownZero2, KnownOne2, Depth+1);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
@@ -1765,7 +1804,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// If the sign extended bits are demanded, we know that the sign
// bit is demanded.
- InSignBit.zext(BitWidth);
+ InSignBit = InSignBit.zext(BitWidth);
if (NewBits.getBoolValue())
InputDemandedBits |= InSignBit;
@@ -1792,7 +1831,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::CTPOP: {
unsigned LowBits = Log2_32(BitWidth)+1;
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
- KnownOne.clear();
+ KnownOne.clearAllBits();
return;
}
case ISD::LOAD: {
@@ -1808,13 +1847,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
- APInt InMask = Mask;
- InMask.trunc(InBits);
- KnownZero.trunc(InBits);
- KnownOne.trunc(InBits);
+ APInt InMask = Mask.trunc(InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
KnownZero |= NewBits;
return;
}
@@ -1823,16 +1861,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
unsigned InBits = InVT.getScalarType().getSizeInBits();
APInt InSignBit = APInt::getSignBit(InBits);
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
- APInt InMask = Mask;
- InMask.trunc(InBits);
+ APInt InMask = Mask.trunc(InBits);
// If any of the sign extended bits are demanded, we know that the sign
// bit is demanded. Temporarily set this bit in the mask for our callee.
if (NewBits.getBoolValue())
InMask |= InSignBit;
- KnownZero.trunc(InBits);
- KnownOne.trunc(InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
// Note if the sign bit is known to be zero or one.
@@ -1844,13 +1881,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// If the sign bit wasn't actually demanded by our caller, we don't
// want it set in the KnownZero and KnownOne result values. Reset the
// mask and reapply it to the result values.
- InMask = Mask;
- InMask.trunc(InBits);
+ InMask = Mask.trunc(InBits);
KnownZero &= InMask;
KnownOne &= InMask;
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
// If the sign bit is known zero or one, the top bits match.
if (SignBitKnownZero)
@@ -1862,26 +1898,24 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::ANY_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InMask = Mask;
- InMask.trunc(InBits);
- KnownZero.trunc(InBits);
- KnownOne.trunc(InBits);
+ APInt InMask = Mask.trunc(InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
return;
}
case ISD::TRUNCATE: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InMask = Mask;
- InMask.zext(InBits);
- KnownZero.zext(InBits);
- KnownOne.zext(InBits);
+ APInt InMask = Mask.zext(InBits);
+ KnownZero = KnownZero.zext(InBits);
+ KnownOne = KnownOne.zext(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero.trunc(BitWidth);
- KnownOne.trunc(BitWidth);
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
break;
}
case ISD::AssertZext: {
@@ -1921,7 +1955,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
}
}
// fall through
- case ISD::ADD: {
+ case ISD::ADD:
+ case ISD::ADDE: {
// Output known-0 bits are known if clear or set in both the low clear bits
// common to both LHS & RHS. For example, 8+(X<<3) is known to have the
// low 3 bits clear.
@@ -1936,7 +1971,17 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
KnownZeroOut = std::min(KnownZeroOut,
KnownZero2.countTrailingOnes());
- KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+ if (Op.getOpcode() == ISD::ADD) {
+ KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+ return;
+ }
+
+ // With ADDE, a carry bit may be added in, so we can only use this
+ // information if we know (at least) that the low two bits are clear. We
+ // then return to the caller that the low bit is unknown but that other bits
+ // are known zero.
+ if (KnownZeroOut >= 2) // ADDE
+ KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);
return;
}
case ISD::SREM:
@@ -1991,10 +2036,19 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
- KnownOne.clear();
+ KnownOne.clearAllBits();
KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
return;
}
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ if (unsigned Align = InferPtrAlignment(Op)) {
+ // The low bits are known zero if the pointer is aligned.
+ KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
+ return;
+ }
+ break;
+
default:
// Allow the target to implement this method for its nodes.
if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
@@ -2234,6 +2288,25 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
}
+/// isBaseWithConstantOffset - Return true if the specified operand is an
+/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
+/// ISD::OR with a ConstantSDNode that is guaranteed to have the same
+/// semantics as an ADD. This handles the equivalence:
+/// X|Cst == X+Cst iff X&Cst = 0.
+bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
+ if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+
+ if (Op.getOpcode() == ISD::OR &&
+ !MaskedValueIsZero(Op.getOperand(0),
+ cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
+ return false;
+
+ return true;
+}
+
+
bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
// If we're told that NaNs won't happen, assume they won't.
if (NoNaNsFPMath)
@@ -2295,7 +2368,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -2308,23 +2381,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
switch (Opcode) {
default: break;
case ISD::SIGN_EXTEND:
- return getConstant(APInt(Val).sextOrTrunc(VT.getSizeInBits()), VT);
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::TRUNCATE:
- return getConstant(APInt(Val).zextOrTrunc(VT.getSizeInBits()), VT);
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
- const uint64_t zero[] = {0, 0};
// No compile time operations on ppcf128.
if (VT == MVT::ppcf128) break;
- APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+ APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
(void)apf.convertFromAPInt(Val,
Opcode==ISD::SINT_TO_FP,
APFloat::rmNearestTiesToEven);
return getConstantFP(apf, VT);
}
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
return getConstantFP(Val.bitsToFloat(), VT);
else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
@@ -2375,7 +2447,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
APInt api(VT.getSizeInBits(), 2, x);
return getConstant(api, VT);
}
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
@@ -2477,13 +2549,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
return Operand.getNode()->getOperand(0);
}
break;
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
// Basic sanity checking.
assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
- && "Cannot BIT_CONVERT between types of different sizes!");
+ && "Cannot BITCAST between types of different sizes!");
if (VT == Operand.getValueType()) return Operand; // noop conversion.
- if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x)
- return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
@@ -2519,7 +2591,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) { // Don't CSE flag producing nodes
+ if (VT != MVT::Glue) { // Don't CSE flag producing nodes
FoldingSetNodeID ID;
SDValue Ops[1] = { Operand };
AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
@@ -2535,7 +2607,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -2676,6 +2748,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
"Shift operators return type must be the same as their first arg");
assert(VT.isInteger() && N2.getValueType().isInteger() &&
"Shifts only work on integers");
+ // Verify that the shift amount VT is bit enough to hold valid shift
+ // amounts. This catches things like trying to shift an i1024 value by an
+ // i8, which is easy to fall into in generic code that uses
+ // TLI.getShiftAmount().
+ assert(N2.getValueType().getSizeInBits() >=
+ Log2_32_Ceil(N1.getValueType().getSizeInBits()) &&
+ "Invalid use of small shift amount with oversized value!");
// Always fold shifts of i1 values so the code generator doesn't need to
// handle them. Since we know the size of the shift has to be less than the
@@ -2820,11 +2899,30 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
return getConstant(ShiftedVal.trunc(ElementSize), VT);
}
break;
- case ISD::EXTRACT_SUBVECTOR:
- if (N1.getValueType() == VT) // Trivial extraction.
- return N1;
+ case ISD::EXTRACT_SUBVECTOR: {
+ SDValue Index = N2;
+ if (VT.isSimple() && N1.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ "Extract subvector VTs must be a vectors!");
+ assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() &&
+ "Extract subvector VTs must have the same element type!");
+ assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ "Extract subvector must be from larger vector to smaller vector!");
+
+ if (isa<ConstantSDNode>(Index.getNode())) {
+ assert((VT.getVectorNumElements() +
+ cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ <= N1.getValueType().getVectorNumElements())
+ && "Extract subvector overflow!");
+ }
+
+ // Trivial extraction.
+ if (VT.getSimpleVT() == N1.getValueType().getSimpleVT())
+ return N1;
+ }
break;
}
+ }
if (N1C) {
if (N2C) {
@@ -2961,7 +3059,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
// Memoize this node if possible.
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) {
+ if (VT != MVT::Glue) {
SDValue Ops[] = { N1, N2 };
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
@@ -2977,7 +3075,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -3019,7 +3117,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
break;
- case ISD::BIT_CONVERT:
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue Index = N3;
+ if (VT.isSimple() && N1.getValueType().isSimple()
+ && N2.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ N2.getValueType().isVector() &&
+ "Insert subvector VTs must be a vectors");
+ assert(VT == N1.getValueType() &&
+ "Dest and insert subvector source types must match!");
+ assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ "Insert subvector must be from smaller vector to larger vector!");
+ if (isa<ConstantSDNode>(Index.getNode())) {
+ assert((N2.getValueType().getVectorNumElements() +
+ cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ <= VT.getVectorNumElements())
+ && "Insert subvector overflow!");
+ }
+
+ // Trivial insertion.
+ if (VT.getSimpleVT() == N2.getValueType().getSimpleVT())
+ return N2;
+ }
+ break;
+ }
+ case ISD::BITCAST:
// Fold bit_convert nodes from a type to themselves.
if (N1.getValueType() == VT)
return N1;
@@ -3029,7 +3151,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
// Memoize node if it doesn't produce a flag.
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) {
+ if (VT != MVT::Glue) {
SDValue Ops[] = { N1, N2, N3 };
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
@@ -3045,7 +3167,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -3087,6 +3209,17 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
&ArgChains[0], ArgChains.size());
}
+/// SplatByte - Distribute ByteVal over NumBits bits.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+ APInt Val = APInt(NumBits, ByteVal);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ }
+ return Val;
+}
+
/// getMemsetValue - Vectorized representation of the memset value
/// operand.
static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
@@ -3095,27 +3228,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
unsigned NumBits = VT.getScalarType().getSizeInBits();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
- APInt Val = APInt(NumBits, C->getZExtValue() & 255);
- unsigned Shift = 8;
- for (unsigned i = NumBits; i > 8; i >>= 1) {
- Val = (Val << Shift) | Val;
- Shift <<= 1;
- }
+ APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
if (VT.isInteger())
return DAG.getConstant(Val, VT);
return DAG.getConstantFP(APFloat(Val), VT);
}
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
- unsigned Shift = 8;
- for (unsigned i = NumBits; i > 8; i >>= 1) {
- Value = DAG.getNode(ISD::OR, dl, VT,
- DAG.getNode(ISD::SHL, dl, VT, Value,
- DAG.getConstant(Shift,
- TLI.getShiftAmountTy())),
- Value);
- Shift <<= 1;
+ if (NumBits > 8) {
+ // Use a multiplication with 0x010101... to extend the input to the
+ // required length.
+ APInt Magic = SplatByte(NumBits, 0x01);
+ Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
}
return Value;
@@ -3131,13 +3255,12 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
if (Str.empty()) {
if (VT.isInteger())
return DAG.getConstant(0, VT);
- else if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
- VT.getSimpleVT().SimpleTy == MVT::f64)
+ else if (VT == MVT::f32 || VT == MVT::f64)
return DAG.getConstantFP(0.0, VT);
else if (VT.isVector()) {
unsigned NumElts = VT.getVectorNumElements();
MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
EltVT, NumElts)));
} else
@@ -3234,15 +3357,6 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
if (VT.bitsGT(LVT))
VT = LVT;
}
-
- // If we're optimizing for size, and there is a limit, bump the maximum number
- // of operations inserted down to 4. This is a wild guess that approximates
- // the size of a call to memcpy or memset (3 arguments + call).
- if (Limit != ~0U) {
- const Function *F = DAG.getMachineFunction().getFunction();
- if (F->hasFnAttr(Attribute::OptimizeForSize))
- Limit = 4;
- }
unsigned NumMemOps = 0;
while (Size != 0) {
@@ -3276,18 +3390,22 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
SDValue Src, uint64_t Size,
unsigned Align, bool isVol,
bool AlwaysInline,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Turn a memcpy of undef to nop.
if (Src.getOpcode() == ISD::UNDEF)
return Chain;
// Expand memcpy to a series of load and store ops if the size operand falls
// below a certain threshold.
+ // TODO: In the AlwaysInline case, if the size is big then generate a loop
+ // rather than maybe a humongous number of loads and stores.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -3297,8 +3415,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
std::string Str;
bool CopyFromStr = isMemSrcFromString(Src, Str);
bool isZeroStr = CopyFromStr && Str.empty();
- unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy();
-
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
+
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
(isZeroStr ? 0 : SrcAlign),
@@ -3334,7 +3452,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, isVol, false, Align);
+ DstPtrInfo.getWithOffset(DstOff), isVol,
+ false, Align);
} else {
// The type might not be legal for the target. This should only happen
// if the type is smaller than a legal type, as on PPC, so the right
@@ -3343,14 +3462,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
// FIXME does the case above also need this?
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
assert(NVT.bitsGE(VT));
- Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain,
+ Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
- SrcSV, SrcSVOff + SrcOff, VT, isVol, false,
+ SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false,
MinAlign(SrcAlign, SrcOff));
Store = DAG.getTruncStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, VT, isVol, false,
- Align);
+ DstPtrInfo.getWithOffset(DstOff), VT, isVol,
+ false, Align);
}
OutChains.push_back(Store);
SrcOff += VTSize;
@@ -3366,8 +3485,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
SDValue Src, uint64_t Size,
unsigned Align, bool isVol,
bool AlwaysInline,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Turn a memmove of undef to nop.
if (Src.getOpcode() == ISD::UNDEF)
return Chain;
@@ -3377,14 +3496,16 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
SrcAlign = Align;
- unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove();
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
@@ -3414,7 +3535,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Value = DAG.getLoad(VT, dl, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
- SrcSV, SrcSVOff + SrcOff, isVol, false, SrcAlign);
+ SrcPtrInfo.getWithOffset(SrcOff), isVol,
+ false, SrcAlign);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -3429,7 +3551,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Store = DAG.getStore(Chain, dl, LoadValues[i],
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, isVol, false, Align);
+ DstPtrInfo.getWithOffset(DstOff), isVol, false, Align);
OutChains.push_back(Store);
DstOff += VTSize;
}
@@ -3442,7 +3564,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain, SDValue Dst,
SDValue Src, uint64_t Size,
unsigned Align, bool isVol,
- const Value *DstSV, uint64_t DstSVOff) {
+ MachinePointerInfo DstPtrInfo) {
// Turn a memset of undef to nop.
if (Src.getOpcode() == ISD::UNDEF)
return Chain;
@@ -3452,13 +3574,15 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
bool NonScalarIntSafe =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
- if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
+ if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
Size, (DstAlignCanChange ? 0 : Align), 0,
NonScalarIntSafe, false, DAG, TLI))
return SDValue();
@@ -3477,15 +3601,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
SmallVector<SDValue, 8> OutChains;
uint64_t DstOff = 0;
unsigned NumMemOps = MemOps.size();
+
+ // Find the largest store and generate the bit pattern for it.
+ EVT LargestVT = MemOps[0];
+ for (unsigned i = 1; i < NumMemOps; i++)
+ if (MemOps[i].bitsGT(LargestVT))
+ LargestVT = MemOps[i];
+ SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
- unsigned VTSize = VT.getSizeInBits() / 8;
- SDValue Value = getMemsetValue(Src, VT, DAG, dl);
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ SDValue Value = MemSetValue;
+ if (VT.bitsLT(LargestVT)) {
+ if (!LargestVT.isVector() && !VT.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+ else
+ Value = getMemsetValue(Src, VT, DAG, dl);
+ }
+ assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, isVol, false, 0);
+ DstPtrInfo.getWithOffset(DstOff),
+ isVol, false, Align);
OutChains.push_back(Store);
- DstOff += VTSize;
+ DstOff += VT.getSizeInBits() / 8;
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -3495,8 +3638,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol, bool AlwaysInline,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Check to see if we should lower the memcpy to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3508,7 +3651,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(),Align,
- isVol, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ isVol, false, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
@@ -3518,7 +3661,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Result =
TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
isVol, AlwaysInline,
- DstSV, DstSVOff, SrcSV, SrcSVOff);
+ DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
@@ -3528,7 +3671,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
assert(ConstantSize && "AlwaysInline requires a constant size!");
return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(), Align, isVol,
- true, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ true, DstPtrInfo, SrcPtrInfo);
}
// FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
@@ -3559,8 +3702,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Check to see if we should lower the memmove to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3573,7 +3716,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Result =
getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(), Align, isVol,
- false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ false, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
@@ -3582,7 +3725,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
// code. If the target chooses to do this, this is the next best.
SDValue Result =
TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
- DstSV, DstSVOff, SrcSV, SrcSVOff);
+ DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
@@ -3611,7 +3754,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol,
- const Value *DstSV, uint64_t DstSVOff) {
+ MachinePointerInfo DstPtrInfo) {
// Check to see if we should lower the memset to stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3623,7 +3766,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Result =
getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
- Align, isVol, DstSV, DstSVOff);
+ Align, isVol, DstPtrInfo);
if (Result.getNode())
return Result;
@@ -3633,11 +3776,11 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
// code. If the target chooses to do this, this is the next best.
SDValue Result =
TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
- DstSV, DstSVOff);
+ DstPtrInfo);
if (Result.getNode())
return Result;
- // Emit a library call.
+ // Emit a library call.
const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -3669,19 +3812,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
- SDValue Chain,
- SDValue Ptr, SDValue Cmp,
- SDValue Swp, const Value* PtrVal,
+ SDValue Chain, SDValue Ptr, SDValue Cmp,
+ SDValue Swp, MachinePointerInfo PtrInfo,
unsigned Alignment) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
- // Check if the memory reference references a frame index
- if (!PtrVal)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
-
MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
@@ -3689,8 +3825,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
Flags |= MachineMemOperand::MOVolatile;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrVal, Flags, 0,
- MemVT.getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO);
}
@@ -3729,12 +3864,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
- // Check if the memory reference references a frame index
- if (!PtrVal)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
-
MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
@@ -3742,7 +3871,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
Flags |= MachineMemOperand::MOVolatile;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrVal, Flags, 0,
+ MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
MemVT.getStoreSize(), Alignment);
return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
@@ -3785,7 +3914,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
}
/// getMergeValues - Create a MERGE_VALUES node from the given operands.
-/// Allowed to return something different (and simpler) if Simplify is true.
SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
DebugLoc dl) {
if (NumOps == 1)
@@ -3803,18 +3931,18 @@ SDValue
SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
const EVT *VTs, unsigned NumVTs,
const SDValue *Ops, unsigned NumOps,
- EVT MemVT, const Value *srcValue, int SVOff,
+ EVT MemVT, MachinePointerInfo PtrInfo,
unsigned Align, bool Vol,
bool ReadMem, bool WriteMem) {
return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
- MemVT, srcValue, SVOff, Align, Vol,
+ MemVT, PtrInfo, Align, Vol,
ReadMem, WriteMem);
}
SDValue
SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
const SDValue *Ops, unsigned NumOps,
- EVT MemVT, const Value *srcValue, int SVOff,
+ EVT MemVT, MachinePointerInfo PtrInfo,
unsigned Align, bool Vol,
bool ReadMem, bool WriteMem) {
if (Align == 0) // Ensure that codegen never sees alignment 0
@@ -3829,8 +3957,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
if (Vol)
Flags |= MachineMemOperand::MOVolatile;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(srcValue, Flags, SVOff,
- MemVT.getStoreSize(), Align);
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align);
return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
}
@@ -3841,13 +3968,14 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
EVT MemVT, MachineMemOperand *MMO) {
assert((Opcode == ISD::INTRINSIC_VOID ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::PREFETCH ||
(Opcode <= INT_MAX &&
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
// Memoize the node unless it returns a flag.
MemIntrinsicSDNode *N;
- if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
void *IP = 0;
@@ -3867,36 +3995,70 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
return SDValue(N, 0);
}
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
+ // If this is FI+Offset, we can model it.
+ if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
+ return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset);
+
+ // If this is (FI+Offset1)+Offset2, we can model it.
+ if (Ptr.getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(Ptr.getOperand(1)) ||
+ !isa<FrameIndexSDNode>(Ptr.getOperand(0)))
+ return MachinePointerInfo();
+
+ int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ return MachinePointerInfo::getFixedStack(FI, Offset+
+ cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) {
+ // If the 'Offset' value isn't a constant, we can't handle this.
+ if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
+ return InferPointerInfo(Ptr, OffsetNode->getSExtValue());
+ if (OffsetOp.getOpcode() == ISD::UNDEF)
+ return InferPointerInfo(Ptr);
+ return MachinePointerInfo();
+}
+
+
SDValue
SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, DebugLoc dl, SDValue Chain,
SDValue Ptr, SDValue Offset,
- const Value *SV, int SVOffset, EVT MemVT,
+ MachinePointerInfo PtrInfo, EVT MemVT,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(VT);
- // Check if the memory reference references a frame index
- if (!SV)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
- MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOLoad;
if (isVolatile)
Flags |= MachineMemOperand::MOVolatile;
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr, Offset);
+
+ MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(SV, Flags, SVOffset,
- MemVT.getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
+ TBAAInfo);
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}
SDValue
-SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, DebugLoc dl, SDValue Chain,
SDValue Ptr, SDValue Offset, EVT MemVT,
MachineMemOperand *MMO) {
@@ -3943,25 +4105,26 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
SDValue Chain, SDValue Ptr,
- const Value *SV, int SVOffset,
+ MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
- SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment);
+ PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
}
-SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl,
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
SDValue Chain, SDValue Ptr,
- const Value *SV,
- int SVOffset, EVT MemVT,
+ MachinePointerInfo PtrInfo, EVT MemVT,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
- SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment);
+ PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment,
+ TBAAInfo);
}
+
SDValue
SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
SDValue Offset, ISD::MemIndexedMode AM) {
@@ -3969,33 +4132,32 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
"Load is already a indexed load!");
return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
- LD->getChain(), Base, Offset, LD->getSrcValue(),
- LD->getSrcValueOffset(), LD->getMemoryVT(),
+ LD->getChain(), Base, Offset, LD->getPointerInfo(),
+ LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
}
SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
- SDValue Ptr, const Value *SV, int SVOffset,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(Val.getValueType());
- // Check if the memory reference references a frame index
- if (!SV)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
- MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOStore;
if (isVolatile)
Flags |= MachineMemOperand::MOVolatile;
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr);
+
+ MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(SV, Flags, SVOffset,
- Val.getValueType().getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags,
+ Val.getValueType().getStoreSize(), Alignment,
+ TBAAInfo);
return getStore(Chain, dl, Val, Ptr, MMO);
}
@@ -4024,27 +4186,26 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
}
SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
- SDValue Ptr, const Value *SV,
- int SVOffset, EVT SVT,
- bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ EVT SVT,bool isVolatile, bool isNonTemporal,
+ unsigned Alignment,
+ const MDNode *TBAAInfo) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(SVT);
- // Check if the memory reference references a frame index
- if (!SV)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
- MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOStore;
if (isVolatile)
Flags |= MachineMemOperand::MOVolatile;
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr);
+
+ MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment,
+ TBAAInfo);
return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
}
@@ -4170,7 +4331,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) {
+ if (VT != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
void *IP = 0;
@@ -4186,7 +4347,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -4236,7 +4397,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
// Memoize the node unless it returns a flag.
SDNode *N;
- if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
void *IP = 0;
@@ -4268,7 +4429,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
}
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -4645,7 +4806,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
unsigned NumOps) {
// If an identical node already exists, use it.
void *IP = 0;
- if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) {
+ if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
@@ -4845,9 +5006,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
const SDValue *Ops, unsigned NumOps) {
- bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag;
+ bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
MachineSDNode *N;
- void *IP;
+ void *IP = 0;
if (DoCSE) {
FoldingSetNodeID ID;
@@ -4876,7 +5037,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifyMachineNode(N);
#endif
return N;
}
@@ -4907,7 +5068,7 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
/// else return NULL.
SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
const SDValue *Ops, unsigned NumOps) {
- if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
void *IP = 0;
@@ -5340,6 +5501,29 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
SD->setHasDebugValue(true);
}
+/// TransferDbgValues - Transfer SDDbgValues.
+void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
+ if (From == To || !From.getNode()->getHasDebugValue())
+ return;
+ SDNode *FromNode = From.getNode();
+ SDNode *ToNode = To.getNode();
+ SmallVector<SDDbgValue *, 2> &DVs = GetDbgValues(FromNode);
+ SmallVector<SDDbgValue *, 2> ClonedDVs;
+ for (SmallVector<SDDbgValue *, 2>::iterator I = DVs.begin(), E = DVs.end();
+ I != E; ++I) {
+ SDDbgValue *Dbg = *I;
+ if (Dbg->getKind() == SDDbgValue::SDNODE) {
+ SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(),
+ Dbg->getOffset(), Dbg->getDebugLoc(),
+ Dbg->getOrder());
+ ClonedDVs.push_back(Clone);
+ }
+ }
+ for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(),
+ E = ClonedDVs.end(); I != E; ++I)
+ AddDbgValue(*I, ToNode, false);
+}
+
//===----------------------------------------------------------------------===//
// SDNode Class
//===----------------------------------------------------------------------===//
@@ -5367,7 +5551,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
}
MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
- const SDValue *Ops, unsigned NumOps, EVT memvt,
+ const SDValue *Ops, unsigned NumOps, EVT memvt,
MachineMemOperand *mmo)
: SDNode(Opc, dl, VTs, Ops, NumOps),
MemoryVT(memvt), MMO(mmo) {
@@ -5386,7 +5570,7 @@ void SDNode::Profile(FoldingSetNodeID &ID) const {
namespace {
struct EVTArray {
std::vector<EVT> VTs;
-
+
EVTArray() {
VTs.reserve(MVT::LAST_VALUETYPE);
for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
@@ -5406,7 +5590,7 @@ const EVT *SDNode::getValueTypeList(EVT VT) {
sys::SmartScopedLock<true> Lock(*VTMutex);
return &(*EVTs->insert(VT).first);
} else {
- assert(VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
+ assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
"Value type out of range!");
return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
}
@@ -5478,9 +5662,9 @@ bool SDNode::isOperandOf(SDNode *N) const {
/// reachesChainWithoutSideEffects - Return true if this operand (which must
/// be a chain) reaches the specified operand without crossing any
-/// side-effecting instructions. In practice, this looks through token
-/// factors and non-volatile loads. In order to remain efficient, this only
-/// looks a couple of nodes in, it does not do an exhaustive search.
+/// side-effecting instructions on any chain path. In practice, this looks
+/// through token factors and non-volatile loads. In order to remain efficient,
+/// this only looks a couple of nodes in, it does not do an exhaustive search.
bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
unsigned Depth) const {
if (*this == Dest) return true;
@@ -5490,12 +5674,12 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
if (Depth == 0) return false;
// If this is a token factor, all inputs to the TF happen in parallel. If any
- // of the operands of the TF reach dest, then we can do the xform.
+ // of the operands of the TF does not reach dest, then we cannot do the xform.
if (getOpcode() == ISD::TokenFactor) {
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
- return true;
- return false;
+ if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+ return false;
+ return true;
}
// Loads don't have side effects, look through them.
@@ -5600,6 +5784,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EH_RETURN: return "EH_RETURN";
case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
case ISD::ConstantPool: return "ConstantPool";
case ISD::ExternalSymbol: return "ExternalSymbol";
case ISD::BlockAddress: return "BlockAddress";
@@ -5690,6 +5875,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::INSERT_SUBVECTOR: return "insert_subvector";
case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
@@ -5723,7 +5909,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UINT_TO_FP: return "uint_to_fp";
case ISD::FP_TO_SINT: return "fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
- case ISD::BIT_CONVERT: return "bit_convert";
+ case ISD::BITCAST: return "bit_convert";
case ISD::FP16_TO_FP32: return "fp16_to_fp32";
case ISD::FP32_TO_FP16: return "fp32_to_fp16";
@@ -5935,12 +6121,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << LBB->getName() << " ";
OS << (const void*)BBDN->getBasicBlock() << ">";
} else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
- if (G && R->getReg() &&
- TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
- OS << " %" << G->getTarget().getRegisterInfo()->getName(R->getReg());
- } else {
- OS << " %reg" << R->getReg();
- }
+ OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
} else if (const ExternalSymbolSDNode *ES =
dyn_cast<ExternalSymbolSDNode>(this)) {
OS << "'" << ES->getSymbol() << "'";
@@ -5986,7 +6167,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
const char *AM = getIndexedModeName(ST->getAddressingMode());
if (*AM)
OS << ", " << AM;
-
+
OS << ">";
} else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
OS << "<" << *M->getMemOperand() << ">";
@@ -6037,7 +6218,7 @@ void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
const SelectionDAG *G, unsigned depth,
- unsigned indent)
+ unsigned indent)
{
if (depth == 0)
return;
@@ -6058,7 +6239,7 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
unsigned depth) const {
printrWithDepthHelper(OS, this, G, depth, 0);
-}
+}
void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
// Don't print impossibly deep things.
@@ -6072,7 +6253,7 @@ void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
void SDNode::dumprFull(const SelectionDAG *G) const {
// Don't print impossibly deep things.
dumprWithDepth(G, 100);
-}
+}
static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
@@ -6156,10 +6337,10 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
}
-/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
-/// location that is 'Dist' units away from the location that the 'Base' load
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+/// location that is 'Dist' units away from the location that the 'Base' load
/// is loading from.
-bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
unsigned Bytes, int Dist) const {
if (LD->getChain() != Base->getChain())
return false;
@@ -6180,11 +6361,11 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
if (FS != BFS || FS != (int)Bytes) return false;
return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
}
- if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
- if (V && (V->getSExtValue() == Dist*Bytes))
- return true;
- }
+
+ // Handle X+C
+ if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+ cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+ return true;
const GlobalValue *GV1 = NULL;
const GlobalValue *GV2 = NULL;
@@ -6225,15 +6406,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
int64_t FrameOffset = 0;
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
FrameIdx = FI->getIndex();
- } else if (Ptr.getOpcode() == ISD::ADD &&
- isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ } else if (isBaseWithConstantOffset(Ptr) &&
isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ // Handle FI+Cst
FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
FrameOffset = Ptr.getConstantOperandVal(1);
}
if (FrameIdx != (1 << 31)) {
- // FIXME: Handle FI+CST.
const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
FrameOffset);
@@ -6354,7 +6534,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
if (OpVal.getOpcode() == ISD::UNDEF)
SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
- SplatValue |= APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
+ SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
zextOrTrunc(sz) << BitPos;
else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
@@ -6369,10 +6549,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
while (sz > 8) {
unsigned HalfSize = sz / 2;
- APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize);
- APInt LowValue = APInt(SplatValue).trunc(HalfSize);
- APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize);
- APInt LowUndef = APInt(SplatUndef).trunc(HalfSize);
+ APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
+ APInt LowValue = SplatValue.trunc(HalfSize);
+ APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
+ APInt LowUndef = SplatUndef.trunc(HalfSize);
// If the two halves do not match (ignoring undef bits), stop here.
if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
@@ -6412,7 +6592,7 @@ static void checkForCyclesHelper(const SDNode *N,
// If this node has already been checked, don't check it again.
if (Checked.count(N))
return;
-
+
// If a node has already been visited on this depth-first walk, reject it as
// a cycle.
if (!Visited.insert(N)) {
@@ -6421,10 +6601,10 @@ static void checkForCyclesHelper(const SDNode *N,
errs() << "Detected cycle in SelectionDAG\n";
abort();
}
-
+
for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
-
+
Checked.insert(N);
Visited.erase(N);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e657445..452f561 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -15,6 +15,7 @@
#include "SDNodeDbgValue.h"
#include "SelectionDAGBuilder.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -43,9 +44,8 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -70,10 +70,28 @@ LimitFPPrecision("limit-float-precision",
cl::location(LimitFloatPrecision),
cl::init(0));
+// Limit the width of DAG chains. This is important in general to prevent
+// prevent DAG-based analysis from blowing up. For example, alias analysis and
+// load clustering may not complete in reasonable time. It is difficult to
+// recognize and avoid this situation within each individual analysis, and
+// future analyses are likely to have the same behavior. Limiting DAG width is
+// the safe approach, and will be especially important with global DAGs.
+//
+// MaxParallelChains default is arbitrarily high to avoid affecting
+// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
+// sequence over this should have been converted to llvm.memcpy by the
+// frontend. It easy to induce this behavior with .ll code such as:
+// %buffer = alloca [4096 x i8]
+// %data = load [4096 x i8]* %argPtr
+// store [4096 x i8] %data, [4096 x i8]* %buffer
+static cl::opt<unsigned>
+MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"),
+ cl::init(64), cl::Hidden);
+
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts, unsigned NumParts,
EVT PartVT, EVT ValueVT);
-
+
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
/// larger then ValueVT then AssertOp can be used to specify whether the extra
@@ -85,7 +103,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
ISD::NodeType AssertOp = ISD::DELETED_NODE) {
if (ValueVT.isVector())
return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
-
+
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
@@ -112,8 +130,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
RoundParts / 2, PartVT, HalfVT);
} else {
- Lo = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[0]);
- Hi = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[1]);
+ Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
}
if (TLI.isBigEndian())
@@ -145,8 +163,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
"Unexpected split");
SDValue Lo, Hi;
- Lo = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[0]);
- Hi = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[1]);
+ Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
@@ -188,7 +206,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
}
if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
- return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
llvm_unreachable("Unknown mismatch!");
return SDValue();
@@ -206,7 +224,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
-
+
// Handle a multi-element vector.
if (NumParts > 1) {
EVT IntermediateVT, RegisterVT;
@@ -219,7 +237,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
assert(RegisterVT == Parts[0].getValueType() &&
"Part type doesn't match part!");
-
+
// Assemble the parts into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
if (NumIntermediates == NumParts) {
@@ -238,20 +256,20 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
PartVT, IntermediateVT);
}
-
+
// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
// intermediate operands.
Val = DAG.getNode(IntermediateVT.isVector() ?
ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
ValueVT, &Ops[0], NumIntermediates);
}
-
+
// There is now one part, held in Val. Correct it to match ValueVT.
PartVT = Val.getValueType();
-
+
if (PartVT == ValueVT)
return Val;
-
+
if (PartVT.isVector()) {
// If the element type of the source/dest vectors are the same, but the
// parts vector has more elements than the value vector, then we have a
@@ -262,12 +280,12 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
"Cannot narrow, it would be a lossy transformation");
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
DAG.getIntPtrConstant(0));
- }
-
+ }
+
// Vector/Vector bitcast.
- return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
}
-
+
assert(ValueVT.getVectorElementType() == PartVT &&
ValueVT.getVectorNumElements() == 1 &&
"Only trivial scalar-to-vector conversions should get here!");
@@ -280,7 +298,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
EVT PartVT);
-
+
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
@@ -289,11 +307,11 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
EVT PartVT,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
EVT ValueVT = Val.getValueType();
-
+
// Handle the vector case separately.
if (ValueVT.isVector())
return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
-
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned PartBits = PartVT.getSizeInBits();
unsigned OrigNumParts = NumParts;
@@ -316,14 +334,14 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
} else {
assert(PartVT.isInteger() && ValueVT.isInteger() &&
- "Unknown mismatch!");
+ "Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
}
} else if (PartBits == ValueVT.getSizeInBits()) {
// Different types of the same size.
assert(NumParts == 1 && PartVT != ValueVT);
- Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
// If the parts cover less bits than value has, truncate the value.
assert(PartVT.isInteger() && ValueVT.isInteger() &&
@@ -366,7 +384,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
// The number of parts is a power of 2. Repeatedly bisect the value using
// EXTRACT_ELEMENT.
- Parts[0] = DAG.getNode(ISD::BIT_CONVERT, DL,
+ Parts[0] = DAG.getNode(ISD::BITCAST, DL,
EVT::getIntegerVT(*DAG.getContext(),
ValueVT.getSizeInBits()),
Val);
@@ -384,8 +402,8 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
ThisVT, Part0, DAG.getIntPtrConstant(0));
if (ThisBits == PartBits && ThisVT != PartVT) {
- Part0 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part0);
- Part1 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part1);
+ Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
+ Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
}
}
}
@@ -403,13 +421,13 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
+
if (NumParts == 1) {
if (PartVT == ValueVT) {
// Nothing to do.
} else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
// Bitconvert vector->vector case.
- Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else if (PartVT.isVector() &&
PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
@@ -420,7 +438,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
ElementVT, Val, DAG.getIntPtrConstant(i)));
-
+
for (unsigned i = ValueVT.getVectorNumElements(),
e = PartVT.getVectorNumElements(); i != e; ++i)
Ops.push_back(DAG.getUNDEF(ElementVT));
@@ -428,7 +446,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
// FIXME: Use CONCAT for 2x -> 4x.
-
+
//SDValue UndefElts = DAG.getUNDEF(VectorTy);
//Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
} else {
@@ -439,11 +457,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
PartVT, Val, DAG.getIntPtrConstant(0));
}
-
+
Parts[0] = Val;
return;
}
-
+
// Handle a multi-element vector.
EVT IntermediateVT, RegisterVT;
unsigned NumIntermediates;
@@ -451,11 +469,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
IntermediateVT,
NumIntermediates, RegisterVT);
unsigned NumElements = ValueVT.getVectorNumElements();
-
+
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
-
+
// Split the vector into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
@@ -467,7 +485,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
IntermediateVT, Val, DAG.getIntPtrConstant(i));
}
-
+
// Split the intermediate operands into legal parts.
if (NumParts == NumIntermediates) {
// If the register was not expanded, promote or copy the value,
@@ -618,48 +636,49 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
}
Chain = P.getValue(1);
+ Parts[i] = P;
// If the source register was virtual and if we know something about it,
// add an assert node.
- if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
- RegisterVT.isInteger() && !RegisterVT.isVector()) {
- unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
- if (FuncInfo.LiveOutRegInfo.size() > SlotNo) {
- const FunctionLoweringInfo::LiveOutInfo &LOI =
- FuncInfo.LiveOutRegInfo[SlotNo];
-
- unsigned RegSize = RegisterVT.getSizeInBits();
- unsigned NumSignBits = LOI.NumSignBits;
- unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
-
- // FIXME: We capture more information than the dag can represent. For
- // now, just use the tightest assertzext/assertsext possible.
- bool isSExt = true;
- EVT FromVT(MVT::Other);
- if (NumSignBits == RegSize)
- isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
- else if (NumZeroBits >= RegSize-1)
- isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
- else if (NumSignBits > RegSize-8)
- isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
- else if (NumZeroBits >= RegSize-8)
- isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
- else if (NumSignBits > RegSize-16)
- isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
- else if (NumZeroBits >= RegSize-16)
- isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
- else if (NumSignBits > RegSize-32)
- isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
- else if (NumZeroBits >= RegSize-32)
- isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
-
- if (FromVT != MVT::Other)
- P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
- RegisterVT, P, DAG.getValueType(FromVT));
- }
- }
+ if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+ !RegisterVT.isInteger() || RegisterVT.isVector() ||
+ !FuncInfo.LiveOutRegInfo.inBounds(Regs[Part+i]))
+ continue;
+
+ const FunctionLoweringInfo::LiveOutInfo &LOI =
+ FuncInfo.LiveOutRegInfo[Regs[Part+i]];
+
+ unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned NumSignBits = LOI.NumSignBits;
+ unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
+
+ // FIXME: We capture more information than the dag can represent. For
+ // now, just use the tightest assertzext/assertsext possible.
+ bool isSExt = true;
+ EVT FromVT(MVT::Other);
+ if (NumSignBits == RegSize)
+ isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
+ else if (NumZeroBits >= RegSize-1)
+ isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
+ else if (NumSignBits > RegSize-8)
+ isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
+ else if (NumZeroBits >= RegSize-8)
+ isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
+ else if (NumSignBits > RegSize-16)
+ isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
+ else if (NumZeroBits >= RegSize-16)
+ isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+ else if (NumSignBits > RegSize-32)
+ isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
+ else if (NumZeroBits >= RegSize-32)
+ isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+ else
+ continue;
- Parts[i] = P;
+ // Add an assertion node.
+ assert(FromVT != MVT::Other);
+ Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+ RegisterVT, P, DAG.getValueType(FromVT));
}
Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
@@ -889,11 +908,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
Val.getResNo(), Offset, dl, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
- } else {
- SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
- Offset, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, false);
- }
+ } else
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
DanglingDebugInfoMap[V] = DanglingDebugInfo();
}
}
@@ -913,7 +929,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
unsigned InReg = It->second;
RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
SDValue Chain = DAG.getEntryNode();
- return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+ N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+ resolveDanglingDebugInfo(V, N);
+ return N;
}
// Otherwise create a new SDValue and remember it.
@@ -1088,7 +1106,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
Chains[i] =
DAG.getStore(Chain, getCurDebugLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + i),
- Add, NULL, Offsets[i], false, false, 0);
+ // FIXME: better loc info would be nice.
+ Add, MachinePointerInfo(), false, false, 0);
}
Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
@@ -1347,7 +1366,7 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
return false;
}
-
+
return true;
}
@@ -1383,6 +1402,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// If this is a series of conditions that are or'd or and'd together, emit
// this as a sequence of branches instead of setcc's with and/or operations.
+ // As long as jumps are not expensive, this should improve performance.
// For example, instead of something like:
// cmp A, B
// C = seteq
@@ -1397,7 +1417,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// jle foo
//
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
- if (BOp->hasOneUse() &&
+ if (!TLI.isJumpExpensive() &&
+ BOp->hasOneUse() &&
(BOp->getOpcode() == Instruction::And ||
BOp->getOpcode() == Instruction::Or)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
@@ -1502,10 +1523,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
MVT::Other, getControlRoot(), Cond,
DAG.getBasicBlock(CB.TrueBB));
- // Insert the false branch.
- if (CB.FalseBB != NextBlock)
- BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
- DAG.getBasicBlock(CB.FalseBB));
+ // Insert the false branch. Do this even if it's a fall through branch,
+ // this makes it easier to do DAG optimizations which require inverting
+ // the branch condition.
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB));
DAG.setRoot(BrCond);
}
@@ -1592,12 +1614,28 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
Sub, DAG.getConstant(B.Range, VT),
ISD::SETUGT);
- SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(),
- TLI.getPointerTy());
+ // Determine the type of the test operands.
+ bool UsePtrType = false;
+ if (!TLI.isTypeLegal(VT))
+ UsePtrType = true;
+ else {
+ for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
+ if ((uint64_t)((int64_t)B.Cases[i].Mask >> VT.getSizeInBits()) + 1 >= 2) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ UsePtrType = true;
+ break;
+ }
+ }
+ if (UsePtrType) {
+ VT = TLI.getPointerTy();
+ Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
+ }
- B.Reg = FuncInfo.CreateReg(TLI.getPointerTy());
+ B.RegVT = VT;
+ B.Reg = FuncInfo.CreateReg(VT);
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
- B.Reg, ShiftOp);
+ B.Reg, Sub);
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
@@ -1623,36 +1661,34 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
}
/// visitBitTestCase - this function produces one "bit test"
-void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
+void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB) {
- SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
- TLI.getPointerTy());
+ EVT VT = BB.RegVT;
+ SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+ Reg, VT);
SDValue Cmp;
if (CountPopulation_64(B.Mask) == 1) {
// Testing for a single bit; just compare the shift count with what it
// would need to be to shift a 1 bit in that position.
Cmp = DAG.getSetCC(getCurDebugLoc(),
- TLI.getSetCCResultType(ShiftOp.getValueType()),
+ TLI.getSetCCResultType(VT),
ShiftOp,
- DAG.getConstant(CountTrailingZeros_64(B.Mask),
- TLI.getPointerTy()),
+ DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
ISD::SETEQ);
} else {
// Make desired shift
- SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
- TLI.getPointerTy(),
- DAG.getConstant(1, TLI.getPointerTy()),
- ShiftOp);
+ SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
+ DAG.getConstant(1, VT), ShiftOp);
// Emit bit tests and jumps
SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
- TLI.getPointerTy(), SwitchVal,
- DAG.getConstant(B.Mask, TLI.getPointerTy()));
+ VT, SwitchVal, DAG.getConstant(B.Mask, VT));
Cmp = DAG.getSetCC(getCurDebugLoc(),
- TLI.getSetCCResultType(AndOp.getValueType()),
- AndOp, DAG.getConstant(0, TLI.getPointerTy()),
+ TLI.getSetCCResultType(VT),
+ AndOp, DAG.getConstant(0, VT),
ISD::SETNE);
}
@@ -1732,10 +1768,56 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
if (++BBI != FuncInfo.MF->end())
NextBlock = BBI;
- // TODO: If any two of the cases has the same destination, and if one value
+ // If any two of the cases has the same destination, and if one value
// is the same as the other, but has one bit unset that the other has set,
// use bit manipulation to do two compares at once. For example:
// "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+ // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
+ // TODO: Handle cases where CR.CaseBB != SwitchBB.
+ if (Size == 2 && CR.CaseBB == SwitchBB) {
+ Case &Small = *CR.Range.first;
+ Case &Big = *(CR.Range.second-1);
+
+ if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
+ const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
+ const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
+
+ // Check that there is only one bit different.
+ if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
+ (SmallValue | BigValue) == BigValue) {
+ // Isolate the common bit.
+ APInt CommonBit = BigValue & ~SmallValue;
+ assert((SmallValue | CommonBit) == BigValue &&
+ CommonBit.countPopulation() == 1 && "Not a common bit?");
+
+ SDValue CondLHS = getValue(SV);
+ EVT VT = CondLHS.getValueType();
+ DebugLoc DL = getCurDebugLoc();
+
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+ DAG.getConstant(CommonBit, VT));
+ SDValue Cond = DAG.getSetCC(DL, MVT::i1,
+ Or, DAG.getConstant(BigValue, VT),
+ ISD::SETEQ);
+
+ // Update successor info.
+ SwitchBB->addSuccessor(Small.BB);
+ SwitchBB->addSuccessor(Default);
+
+ // Insert the true branch.
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
+ getControlRoot(), Cond,
+ DAG.getBasicBlock(Small.BB));
+
+ // Insert the false branch.
+ BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+ DAG.getBasicBlock(Default));
+
+ DAG.setRoot(BrCond);
+ return true;
+ }
+ }
+ }
// Rearrange the case blocks so that the last one falls through if possible.
if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
@@ -1800,9 +1882,8 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) {
}
static APInt ComputeRange(const APInt &First, const APInt &Last) {
- APInt LastExt(Last), FirstExt(First);
uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
- LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
+ APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
return (LastExt - FirstExt + 1ULL);
}
@@ -2151,7 +2232,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
}
BitTestBlock BTB(lowBound, cmpRange, SV,
- -1U, (CR.CaseBB == SwitchBB),
+ -1U, MVT::Other, (CR.CaseBB == SwitchBB),
CR.CaseBB, Default, BTC);
if (CR.CaseBB == SwitchBB)
@@ -2180,7 +2261,8 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
if (Cases.size() >= 2)
// Must recompute end() each iteration because it may be
// invalidated by erase if we hold on to it
- for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+ for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+ J != Cases.end(); ) {
const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
MachineBasicBlock* nextBB = J->BB;
@@ -2205,6 +2287,19 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
return numCmps;
}
+void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
+ MachineBasicBlock *Last) {
+ // Update JTCases.
+ for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
+ if (JTCases[i].first.HeaderBB == First)
+ JTCases[i].first.HeaderBB = Last;
+
+ // Update BitTestCases.
+ for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
+ if (BitTestCases[i].Parent == First)
+ BitTestCases[i].Parent = Last;
+}
+
void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
@@ -2292,30 +2387,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
void SelectionDAGBuilder::visitFSub(const User &I) {
// -0.0 - X --> fneg
const Type *Ty = I.getType();
- if (Ty->isVectorTy()) {
- if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
- const VectorType *DestTy = cast<VectorType>(I.getType());
- const Type *ElTy = DestTy->getElementType();
- unsigned VL = DestTy->getNumElements();
- std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
- Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
- if (CV == CNZ) {
- SDValue Op2 = getValue(I.getOperand(1));
- setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
- Op2.getValueType(), Op2));
- return;
- }
- }
+ if (isa<Constant>(I.getOperand(0)) &&
+ I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+ Op2.getValueType(), Op2));
+ return;
}
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
- if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
- SDValue Op2 = getValue(I.getOperand(1));
- setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
- Op2.getValueType(), Op2));
- return;
- }
-
visitBinary(I, ISD::FSUB);
}
@@ -2329,31 +2408,29 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- if (!I.getType()->isVectorTy() &&
- Op2.getValueType() != TLI.getShiftAmountTy()) {
+
+ MVT ShiftTy = TLI.getShiftAmountTy();
+
+ // Coerce the shift amount to the right type if we can.
+ if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+ unsigned ShiftSize = ShiftTy.getSizeInBits();
+ unsigned Op2Size = Op2.getValueType().getSizeInBits();
+ DebugLoc DL = getCurDebugLoc();
+
// If the operand is smaller than the shift count type, promote it.
- EVT PTy = TLI.getPointerTy();
- EVT STy = TLI.getShiftAmountTy();
- if (STy.bitsGT(Op2.getValueType()))
- Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
- TLI.getShiftAmountTy(), Op2);
+ if (ShiftSize > Op2Size)
+ Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
+
// If the operand is larger than the shift count type but the shift
// count type has enough bits to represent any shift value, truncate
// it now. This is a common case and it exposes the truncate to
// optimization early.
- else if (STy.getSizeInBits() >=
- Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
- Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
- TLI.getShiftAmountTy(), Op2);
- // Otherwise we'll need to temporarily settle for some other
- // convenient type; type legalization will make adjustments as
- // needed.
- else if (PTy.bitsLT(Op2.getValueType()))
- Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
- TLI.getPointerTy(), Op2);
- else if (PTy.bitsGT(Op2.getValueType()))
- Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
- TLI.getPointerTy(), Op2);
+ else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
+ // Otherwise we'll need to temporarily settle for some other convenient
+ // type. Type legalization will make adjustments once the shiftee is split.
+ else
+ Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
}
setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
@@ -2499,9 +2576,9 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {
EVT DestVT = TLI.getValueType(I.getType());
// BitCast assures us that source and destination are the same size so this is
- // either a BIT_CONVERT or a no-op.
+ // either a BITCAST or a no-op.
if (DestVT != N.getValueType())
- setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
DestVT, N)); // convert types.
else
setValue(&I, N); // noop cast.
@@ -2650,7 +2727,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
} else {
StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
- StartIdx[Input] + MaskNumElts < SrcNumElts)
+ StartIdx[Input] + MaskNumElts <= SrcNumElts)
RangeUse[Input] = 1; // Extract from a multiple of the mask length.
}
}
@@ -2726,8 +2803,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
bool IntoUndef = isa<UndefValue>(Op0);
bool FromUndef = isa<UndefValue>(Op1);
- unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
- I.idx_begin(), I.idx_end());
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
SmallVector<EVT, 4> AggValueVTs;
ComputeValueVTs(TLI, AggTy, AggValueVTs);
@@ -2765,8 +2841,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
const Type *ValTy = I.getType();
bool OutOfUndef = isa<UndefValue>(Op0);
- unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
- I.idx_begin(), I.idx_end());
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
SmallVector<EVT, 4> ValValueVTs;
ComputeValueVTs(TLI, ValTy, ValValueVTs);
@@ -2884,7 +2959,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
// the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
- unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
if (Align <= StackAlign)
Align = 0;
@@ -2920,6 +2995,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata("nontemporal") != 0;
unsigned Alignment = I.getAlignment();
+ const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
@@ -2930,10 +3006,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SDValue Root;
bool ConstantMemory = false;
- if (I.isVolatile())
+ if (I.isVolatile() || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
- else if (AA->pointsToConstantMemory(SV)) {
+ else if (AA->pointsToConstantMemory(
+ AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -2943,23 +3020,38 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
}
SmallVector<SDValue, 4> Values(NumValues);
- SmallVector<SDValue, 4> Chains(NumValues);
+ SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+ NumValues));
EVT PtrVT = Ptr.getValueType();
- for (unsigned i = 0; i != NumValues; ++i) {
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // Serializing loads here may result in excessive register pressure, and
+ // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
+ // could recover a bit by hoisting nodes upward in the chain by recognizing
+ // they are side-effect free or do not alias. The optimizer should really
+ // avoid this case by converting large object/array copies to llvm.memcpy
+ // (MaxParallelChains should always remain as failsafe).
+ if (ChainI == MaxParallelChains) {
+ assert(PendingLoads.empty() && "PendingLoads must be serialized first");
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ Root = Chain;
+ ChainI = 0;
+ }
SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
PtrVT, Ptr,
DAG.getConstant(Offsets[i], PtrVT));
SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
- A, SV, Offsets[i], isVolatile,
- isNonTemporal, Alignment);
+ A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
+ isNonTemporal, Alignment, TBAAInfo);
Values[i] = L;
- Chains[i] = L.getValue(1);
+ Chains[ChainI] = L.getValue(1);
}
if (!ConstantMemory) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
- MVT::Other, &Chains[0], NumValues);
+ MVT::Other, &Chains[0], ChainI);
if (isVolatile)
DAG.setRoot(Chain);
else
@@ -2989,23 +3081,37 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SDValue Ptr = getValue(PtrV);
SDValue Root = getRoot();
- SmallVector<SDValue, 4> Chains(NumValues);
+ SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+ NumValues));
EVT PtrVT = Ptr.getValueType();
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata("nontemporal") != 0;
unsigned Alignment = I.getAlignment();
-
- for (unsigned i = 0; i != NumValues; ++i) {
+ const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // See visitLoad comments.
+ if (ChainI == MaxParallelChains) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ Root = Chain;
+ ChainI = 0;
+ }
SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
DAG.getConstant(Offsets[i], PtrVT));
- Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
- SDValue(Src.getNode(), Src.getResNo() + i),
- Add, PtrV, Offsets[i], isVolatile,
- isNonTemporal, Alignment);
- }
-
- DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
- MVT::Other, &Chains[0], NumValues));
+ SDValue St = DAG.getStore(Root, getCurDebugLoc(),
+ SDValue(Src.getNode(), Src.getResNo() + i),
+ Add, MachinePointerInfo(PtrV, Offsets[i]),
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ Chains[ChainI] = St;
+ }
+
+ SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ ++SDNodeOrder;
+ AssignOrderingToNode(StoreNode.getNode());
+ DAG.setRoot(StoreNode);
}
/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
@@ -3031,7 +3137,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
- if (!IsTgtIntrinsic)
+ if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
+ Info.opc == ISD::INTRINSIC_W_CHAIN)
Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
// Add all operands of the call to the operand list.
@@ -3062,7 +3169,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// This is target intrinsic that touches memory
Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
VTs, &Ops[0], Ops.size(),
- Info.memVT, Info.ptrVal, Info.offset,
+ Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
Info.align, Info.vol,
Info.readMem, Info.writeMem);
} else if (!HasChain) {
@@ -3087,7 +3195,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
if (!I.getType()->isVoidTy()) {
if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
EVT VT = TLI.getValueType(PTy);
- Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
+ Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
}
setValue(&I, Result);
@@ -3106,7 +3214,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
DAG.getConstant(0x007fffff, MVT::i32));
SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
DAG.getConstant(0x3f800000, MVT::i32));
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
}
/// GetExponent - Get the exponent:
@@ -3205,13 +3313,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f7f5e7e));
- SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);
// Add the exponent into the result in integer domain.
SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
TwoToFracPartOfX, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
+ result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);
} else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
@@ -3231,13 +3339,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3f7ff8fd));
- SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);
// Add the exponent into the result in integer domain.
SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
TwoToFracPartOfX, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
+ result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);
} else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
@@ -3269,14 +3377,14 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
getF32Constant(DAG, 0x3f800000));
- SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,
MVT::i32, t13);
// Add the exponent into the result in integer domain.
SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
TwoToFracPartOfX, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
+ result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);
}
} else {
// No special expansion.
@@ -3298,7 +3406,7 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op = getValue(I.getArgOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Scale the exponent by log(2) [0.69314718f].
SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
@@ -3408,7 +3516,7 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op = getValue(I.getArgOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Get the exponent.
SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
@@ -3517,7 +3625,7 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op = getValue(I.getArgOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Scale the exponent by log10(2) [0.30102999f].
SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
@@ -3645,11 +3753,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f7f5e7e));
- SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+ SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
@@ -3670,11 +3778,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3f7ff8fd));
- SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+ SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
// For floating-point precision of 18:
@@ -3706,11 +3814,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
getF32Constant(DAG, 0x3f800000));
- SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+ SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
}
} else {
@@ -3778,11 +3886,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f7f5e7e));
- SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+ SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
@@ -3803,11 +3911,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3f7ff8fd));
- SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+ SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
// For floating-point precision of 18:
@@ -3839,11 +3947,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
getF32Constant(DAG, 0x3f800000));
- SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+ SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
}
} else {
@@ -3915,13 +4023,16 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
/// At the end of instruction selection, they will be inserted to the entry BB.
bool
SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
- int64_t Offset,
+ int64_t Offset,
const SDValue &N) {
const Argument *Arg = dyn_cast<Argument>(V);
if (!Arg)
return false;
MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+
// Ignore inlined function arguments here.
DIVariable DV(Variable);
if (DV.isInlinedFnArgument(MF.getFunction()))
@@ -3935,14 +4046,16 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
if (Arg->hasByValAttr()) {
// Byval arguments' frame index is recorded during argument lowering.
// Use this info directly.
- const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
Reg = TRI->getFrameRegister(MF);
Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+ // If byval argument ofset is not recorded then ignore this.
+ if (!Offset)
+ Reg = 0;
}
if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) {
Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
- if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
unsigned PR = RegInfo.getLiveInPhysReg(Reg);
if (PR)
@@ -3951,13 +4064,25 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
}
if (!Reg) {
+ // Check if ValueMap has reg number.
DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
- if (VMI == FuncInfo.ValueMap.end())
- return false;
- Reg = VMI->second;
+ if (VMI != FuncInfo.ValueMap.end())
+ Reg = VMI->second;
}
- const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+ if (!Reg && N.getNode()) {
+ // Check if frame index is available.
+ if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+ if (FrameIndexSDNode *FINode =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) {
+ Reg = TRI->getFrameRegister(MF);
+ Offset = FINode->getIndex();
+ }
+ }
+
+ if (!Reg)
+ return false;
+
MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
TII->get(TargetOpcode::DBG_VALUE))
.addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
@@ -3966,9 +4091,11 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
}
// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
#endif
/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
@@ -4013,7 +4140,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
- I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1))));
return 0;
}
case Intrinsic::memset: {
@@ -4028,7 +4156,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- I.getArgOperand(0), 0));
+ MachinePointerInfo(I.getArgOperand(0))));
return 0;
}
case Intrinsic::memmove: {
@@ -4044,22 +4172,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Op3 = getValue(I.getArgOperand(2));
unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-
- // If the source and destination are known to not be aliases, we can
- // lower memmove as memcpy.
- uint64_t Size = -1ULL;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
- Size = C->getZExtValue();
- if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) ==
- AliasAnalysis::NoAlias) {
- DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- false, I.getArgOperand(0), 0,
- I.getArgOperand(1), 0));
- return 0;
- }
-
DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1))));
return 0;
}
case Intrinsic::dbg_declare: {
@@ -4078,10 +4193,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Check if address has undef value.
if (isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
- SDDbgValue*SDV =
- DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
- 0, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, false);
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
return 0;
}
@@ -4092,7 +4204,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDDbgValue *SDV;
if (N.getNode()) {
// Parameters are handled specially.
- bool isParameter =
+ bool isParameter =
DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
Address = BCI->getOperand(0);
@@ -4104,25 +4216,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Byval parameter. We have a frame index at this point.
SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
0, dl, SDNodeOrder);
- else
+ else {
// Can't do anything with other non-AI cases yet. This might be a
// parameter of a callee function that got inlined, for example.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
return 0;
+ }
} else if (AI)
SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
0, dl, SDNodeOrder);
- else
+ else {
// Can't do anything with other non-AI cases yet.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
return 0;
+ }
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
} else {
- // If Address is an arugment then try to emits its dbg value using
- // virtual register info from the FuncInfo.ValueMap. Otherwise add undef
- // to help track missing debug info.
+ // If Address is an argument then try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
- SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
- 0, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, false);
+ // If variable is pinned by a alloca in dominating bb then
+ // use StaticAllocaMap.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+ if (AI->getParent() != DI.getParent()) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ SDV = DAG.getDbgValue(Variable, SI->second,
+ 0, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ return 0;
+ }
+ }
+ }
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
}
}
return 0;
@@ -4160,17 +4287,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
N.getResNo(), Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
}
- } else if (isa<PHINode>(V) && !V->use_empty() ) {
+ } else if (!V->use_empty() ) {
// Do not call getValue(V) yet, as we don't want to generate code.
// Remember it for later.
DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
DanglingDebugInfoMap[V] = DDI;
} else {
// We may expand this to cover more cases. One case where we have no
- // data available is an unreferenced parameter; we need this fallback.
- SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
- Offset, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, false);
+ // data available is an unreferenced parameter.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
}
}
@@ -4186,7 +4311,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (SI == FuncInfo.StaticAllocaMap.end())
return 0; // VLAs.
int FI = SI->second;
-
+
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
@@ -4282,11 +4407,75 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::eh_sjlj_longjmp: {
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
- getRoot(),
- getValue(I.getArgOperand(0))));
+ getRoot(), getValue(I.getArgOperand(0))));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_dispatch_setup: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+ getRoot(), getValue(I.getArgOperand(0))));
return 0;
}
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d: {
+ SDValue ShAmt = getValue(I.getArgOperand(1));
+ if (isa<ConstantSDNode>(ShAmt)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return 0;
+ }
+ unsigned NewIntrinsic = 0;
+ EVT ShAmtVT = MVT::v2i32;
+ switch (Intrinsic) {
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+ break;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+ break;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+ break;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+ break;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+ break;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+ break;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+ break;
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+ break;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ }
+
+ // The vector shift intrinsics with scalars uses 32b shift amounts but
+ // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+ // to be zero.
+ // We must do this early because v2i32 is not a legal type.
+ DebugLoc dl = getCurDebugLoc();
+ SDValue ShOps[2];
+ ShOps[0] = ShAmt;
+ ShOps[1] = DAG.getConstant(0, MVT::i32);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+ EVT DestVT = TLI.getValueType(I.getType());
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt);
+ Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(NewIntrinsic, MVT::i32),
+ getValue(I.getArgOperand(0)), ShAmt);
+ setValue(&I, Res);
+ return 0;
+ }
case Intrinsic::convertff:
case Intrinsic::convertfsi:
case Intrinsic::convertfui:
@@ -4430,8 +4619,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Store the stack protector onto the stack.
Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
- PseudoSourceValue::getFixedStack(FI),
- 0, true, false, 0);
+ MachinePointerInfo::getFixedStack(FI),
+ true, false, 0);
setValue(&I, Res);
DAG.setRoot(Res);
return 0;
@@ -4510,14 +4699,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::prefetch: {
SDValue Ops[4];
+ unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
Ops[0] = getRoot();
Ops[1] = getValue(I.getArgOperand(0));
Ops[2] = getValue(I.getArgOperand(1));
Ops[3] = getValue(I.getArgOperand(2));
- DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
+ DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
+ DAG.getVTList(MVT::Other),
+ &Ops[0], 4,
+ EVT::getIntegerVT(*Context, 8),
+ MachinePointerInfo(I.getArgOperand(0)),
+ 0, /* align */
+ false, /* volatile */
+ rw==0, /* read */
+ rw==1)); /* write */
return 0;
}
-
case Intrinsic::memory_barrier: {
SDValue Ops[6];
Ops[0] = getRoot();
@@ -4536,7 +4733,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2)),
- I.getArgOperand(0));
+ MachinePointerInfo(I.getArgOperand(0)));
setValue(&I, L);
DAG.setRoot(L.getValue(1));
return 0;
@@ -4599,6 +4796,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
FTy->isVarArg(), Outs, FTy->getContext());
SDValue DemoteStackSlot;
+ int DemoteStackIdx = -100;
if (!CanLowerReturn) {
uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
@@ -4606,10 +4804,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(
FTy->getReturnType());
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
- DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
Entry.Node = DemoteStackSlot;
Entry.Ty = StackSlotPtrType;
Entry.isSExt = false;
@@ -4703,7 +4901,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
DemoteStackSlot,
DAG.getConstant(Offsets[i], PtrVT));
SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
- Add, NULL, Offsets[i], false, false, 1);
+ Add,
+ MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
+ false, false, 1);
Values[i] = L;
Chains[i] = L.getValue(1);
}
@@ -4711,7 +4911,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
MVT::Other, &Chains[0], NumValues);
PendingLoads.push_back(Chain);
-
+
// Collect the legal value parts into potentially illegal values
// that correspond to the original function's return values.
SmallVector<EVT, 4> RetTys;
@@ -4724,7 +4924,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
EVT VT = RetTys[I];
EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
-
+
SDValue ReturnValue =
getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
RegisterVT, VT, AssertOp);
@@ -4806,7 +5006,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
SDValue Ptr = Builder.getValue(PtrVal);
SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
- Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
+ Ptr, MachinePointerInfo(PtrVal),
false /*volatile*/,
false /*nontemporal*/, 1 /* align=1 */);
@@ -4902,7 +5102,25 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
visitInlineAsm(&I);
return;
}
-
+
+ // See if any floating point values are being passed to this function. This is
+ // used to emit an undefined reference to fltused on Windows.
+ const FunctionType *FT =
+ cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ if (FT->isVarArg() &&
+ !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ const Type* T = I.getArgOperand(i)->getType();
+ for (po_iterator<const Type*> i = po_begin(T), e = po_end(T);
+ i != e; ++i) {
+ if (!i->isFloatingPointTy()) continue;
+ MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
+ break;
+ }
+ }
+ }
+
const char *RenameFn = 0;
if (Function *F = I.getCalledFunction()) {
if (F->isDeclaration()) {
@@ -4980,7 +5198,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
}
}
}
-
+
SDValue Callee;
if (!RenameFn)
Callee = getValue(I.getCalledValue());
@@ -5008,7 +5226,7 @@ public:
/// contains the set of register corresponding to the operand.
RegsForValue AssignedRegs;
- explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+ explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
: TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
}
@@ -5083,6 +5301,8 @@ private:
}
};
+typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
+
} // end llvm namespace.
/// isAllocatableRegister - If the specified register is safe to allocate,
@@ -5192,7 +5412,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
// vector types).
EVT RegVT = *PhysReg.second->vt_begin();
if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
- OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
} else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
@@ -5202,7 +5422,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
// machine.
RegVT = EVT::getIntegerVT(Context,
OpInfo.ConstraintVT.getSizeInBits());
- OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
}
@@ -5320,30 +5540,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
/// ConstraintOperands - Information about all of the constraints.
- std::vector<SDISelAsmOperandInfo> ConstraintOperands;
+ SDISelAsmOperandInfoVector ConstraintOperands;
std::set<unsigned> OutputRegs, InputRegs;
- // Do a prepass over the constraints, canonicalizing them, and building up the
- // ConstraintOperands list.
- std::vector<InlineAsm::ConstraintInfo>
- ConstraintInfos = IA->ParseConstraints();
-
- bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
-
- SDValue Chain, Flag;
-
- // We won't need to flush pending loads if this asm doesn't touch
- // memory and is nonvolatile.
- if (hasMemory || IA->hasSideEffects())
- Chain = getRoot();
- else
- Chain = DAG.getRoot();
+ TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS);
+ bool hasMemory = false;
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
unsigned ResNo = 0; // ResNo - The result number of the next output.
- for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
- ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
EVT OpVT = MVT::Other;
@@ -5380,9 +5587,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// If this is an input or an indirect output, process the call argument.
// BasicBlocks are labels, currently appearing only in asm's.
if (OpInfo.CallOperandVal) {
- // Strip bitcasts, if any. This mostly comes up for functions.
- OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
-
if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
} else {
@@ -5393,11 +5597,33 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
OpInfo.ConstraintVT = OpVT;
+
+ // Indirect operand accesses access memory.
+ if (OpInfo.isIndirect)
+ hasMemory = true;
+ else {
+ for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
+ TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]);
+ if (CType == TargetLowering::C_Memory) {
+ hasMemory = true;
+ break;
+ }
+ }
+ }
}
+ SDValue Chain, Flag;
+
+ // We won't need to flush pending loads if this asm doesn't touch
+ // memory and is nonvolatile.
+ if (hasMemory || IA->hasSideEffects())
+ Chain = getRoot();
+ else
+ Chain = DAG.getRoot();
+
// Second pass over the constraints: compute which constraint option to use
// and assign registers to constraints that want a specific physreg.
- for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
// If this is an output operand with a matching input operand, look up the
@@ -5406,7 +5632,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// error.
if (OpInfo.hasMatchingInput()) {
SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
-
+
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
@@ -5427,7 +5653,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// need to to provide an address for the memory input.
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
!OpInfo.isIndirect) {
- assert(OpInfo.Type == InlineAsm::isInput &&
+ assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) &&
"Can only indirectify direct input operands!");
// Memory operands really want the address of the value. If we don't have
@@ -5451,7 +5677,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
Chain = DAG.getStore(Chain, getCurDebugLoc(),
- OpInfo.CallOperand, StackSlot, NULL, 0,
+ OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
OpInfo.CallOperand = StackSlot;
}
@@ -5469,8 +5696,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
}
- ConstraintInfos.clear();
-
// Second pass - Loop over all of the operands, assigning virtual or physregs
// to register class operands.
for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
@@ -5495,9 +5720,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
- // Remember the AlignStack bit as operand 3.
- AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0,
- MVT::i1));
+ // Remember the HasSideEffect and AlignStack bits as operand 3.
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
+ TLI.getPointerTy()));
// Loop over all of the inputs, copying the operand values into the
// appropriate registers and processing the output regs.
@@ -5588,7 +5818,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
" don't know how to handle tied "
"indirect register inputs");
}
-
+
RegsForValue MatchedRegs;
MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
@@ -5607,7 +5837,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
DAG, AsmNodeOperands);
break;
}
-
+
assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
"Unexpected number of operands");
@@ -5622,8 +5852,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Treat indirect 'X' constraint as memory.
- if (OpInfo.ConstraintType == TargetLowering::C_Other &&
- OpInfo.isIndirect)
+ if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ OpInfo.isIndirect)
OpInfo.ConstraintType = TargetLowering::C_Memory;
if (OpInfo.ConstraintType == TargetLowering::C_Other) {
@@ -5642,7 +5872,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
break;
}
-
+
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
@@ -5693,7 +5923,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
- DAG.getVTList(MVT::Other, MVT::Flag),
+ DAG.getVTList(MVT::Other, MVT::Glue),
&AsmNodeOperands[0], AsmNodeOperands.size());
Flag = Chain.getValue(1);
@@ -5713,7 +5943,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// not have the same VT as was expected. Convert it to the right type
// with bit_convert.
if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
- Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
ResultType, Val);
} else if (ResultType != Val.getValueType() &&
@@ -5751,7 +5981,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
StoresToEmit[i].first,
getValue(StoresToEmit[i].second),
- StoresToEmit[i].second, 0,
+ MachinePointerInfo(StoresToEmit[i].second),
false, false, 0);
OutChains.push_back(Val);
}
@@ -5888,7 +6118,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
- MyFlags.VT = RegisterVT;
+ MyFlags.VT = RegisterVT.getSimpleVT();
MyFlags.Used = isReturnValueUsed;
if (RetSExt)
MyFlags.Flags.setSExt();
@@ -5924,7 +6154,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
assert(InVals[i].getNode() &&
"LowerCall emitted a null value!");
- assert(Ins[i].VT == InVals[i].getValueType() &&
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
"LowerCall emitted a value with the wrong type!");
});
@@ -6085,7 +6315,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
assert(InVals[i].getNode() &&
"LowerFormalArguments emitted a null value!");
- assert(Ins[i].VT == InVals[i].getValueType() &&
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
"LowerFormalArguments emitted a value with the wrong type!");
}
});
@@ -6154,7 +6384,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// Note down frame index for byval arguments.
if (I->hasByValAttr() && !ArgValues.empty())
- if (FrameIndexSDNode *FI =
+ if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex());
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 5f400e9..a1a70c3 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -258,15 +258,16 @@ private:
struct BitTestBlock {
BitTestBlock(APInt F, APInt R, const Value* SV,
- unsigned Rg, bool E,
+ unsigned Rg, EVT RgVT, bool E,
MachineBasicBlock* P, MachineBasicBlock* D,
const BitTestInfo& C):
- First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E),
+ First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
Parent(P), Default(D), Cases(C) { }
APInt First;
APInt Range;
const Value *SValue;
unsigned Reg;
+ EVT RegVT;
bool Emitted;
MachineBasicBlock *Parent;
MachineBasicBlock *Default;
@@ -347,7 +348,7 @@ public:
SDValue getControlRoot();
DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
-
+ void setCurDebugLoc(DebugLoc dl){ CurDebugLoc = dl; }
unsigned getSDNodeOrder() const { return SDNodeOrder; }
void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
@@ -398,6 +399,10 @@ public:
void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
MachineBasicBlock *LandingPad = NULL);
+ /// UpdateSplitBlock - When an MBB was split during scheduling, update the
+ /// references that ned to refer to the last resulting block.
+ void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+
private:
// Terminator instructions.
void visitRet(const ReturnInst &I);
@@ -431,7 +436,8 @@ public:
void visitSwitchCase(CaseBlock &CB,
MachineBasicBlock *SwitchBB);
void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
- void visitBitTestCase(MachineBasicBlock* NextMBB,
+ void visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 66cb5ce..62ebc81 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -43,6 +43,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -53,8 +54,17 @@
using namespace llvm;
STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
+STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+#ifndef NDEBUG
+STATISTIC(NumBBWithOutOfOrderLineInfo,
+ "Number of blocks with out of order line number info");
+STATISTIC(NumMBBWithOutOfOrderLineInfo,
+ "Number of machine blocks with out of order line number info");
+#endif
+
static cl::opt<bool>
EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
cl::desc("Enable verbose messages in the \"fast\" "
@@ -170,15 +180,18 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
-SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) :
+SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
+ CodeGenOpt::Level OL) :
MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
FuncInfo(new FunctionLoweringInfo(TLI)),
CurDAG(new SelectionDAG(tm)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
GFI(),
OptLevel(OL),
- DAGSize(0)
-{}
+ DAGSize(0) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+ initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+ }
SelectionDAGISel::~SelectionDAGISel() {
delete SDB;
@@ -202,6 +215,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
static bool FunctionCallsSetJmp(const Function *F) {
const Module *M = F->getParent();
static const char *ReturnsTwiceFns[] = {
+ "_setjmp",
"setjmp",
"sigsetjmp",
"setjmp_syscall",
@@ -227,6 +241,44 @@ static bool FunctionCallsSetJmp(const Function *F) {
#undef NUM_RETURNS_TWICE_FNS
}
+/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
+/// may trap on it. In this case we have to split the edge so that the path
+/// through the predecessor block that doesn't go to the phi block doesn't
+/// execute the possibly trapping instruction.
+///
+/// This is required for correctness, so it must be done at -O0.
+///
+static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
+ // Loop for blocks with phi nodes.
+ for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ PHINode *PN = dyn_cast<PHINode>(BB->begin());
+ if (PN == 0) continue;
+
+ ReprocessBlock:
+ // For each block with a PHI node, check to see if any of the input values
+ // are potentially trapping constant expressions. Constant expressions are
+ // the only potentially trapping value that can occur as the argument to a
+ // PHI.
+ for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
+ if (CE == 0 || !CE->canTrap()) continue;
+
+ // The only case we have to worry about is when the edge is critical.
+ // Since this block has a PHI Node, we assume it has multiple input
+ // edges: check to see if the pred has multiple successors.
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ if (Pred->getTerminator()->getNumSuccessors() == 1)
+ continue;
+
+ // Okay, we have to split this edge.
+ SplitCriticalEdge(Pred->getTerminator(),
+ GetSuccessorNumber(Pred, BB), SDISel, true);
+ goto ReprocessBlock;
+ }
+ }
+}
+
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Do some sanity-checking on the command-line options.
assert((!EnableFastISelVerbose || EnableFastISel) &&
@@ -245,6 +297,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+ SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
+
CurDAG->init(*MF);
FuncInfo->set(Fn, *MF);
SDB->init(GFI, *AA);
@@ -261,7 +315,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
if (!FuncInfo->ArgDbgValues.empty())
for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
E = RegInfo->livein_end(); LI != E; ++LI)
- if (LI->second)
+ if (LI->second)
LiveInMap.insert(std::make_pair(LI->first, LI->second));
// Insert DBG_VALUE instructions for function arguments to the entry block.
@@ -282,14 +336,37 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
if (LDI != LiveInMap.end()) {
MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
MachineBasicBlock::iterator InsertPos = Def;
- const MDNode *Variable =
+ const MDNode *Variable =
MI->getOperand(MI->getNumOperands()-1).getMetadata();
unsigned Offset = MI->getOperand(1).getImm();
// Def is never a terminator here, so it is ok to increment InsertPos.
- BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
+ BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
TII.get(TargetOpcode::DBG_VALUE))
.addReg(LDI->second, RegState::Debug)
.addImm(Offset).addMetadata(Variable);
+
+ // If this vreg is directly copied into an exported register then
+ // that COPY instructions also need DBG_VALUE, if it is the only
+ // user of LDI->second.
+ MachineInstr *CopyUseMI = NULL;
+ for (MachineRegisterInfo::use_iterator
+ UI = RegInfo->use_begin(LDI->second);
+ MachineInstr *UseMI = UI.skipInstruction();) {
+ if (UseMI->isDebugValue()) continue;
+ if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
+ CopyUseMI = UseMI; continue;
+ }
+ // Otherwise this is another use or second copy use.
+ CopyUseMI = NULL; break;
+ }
+ if (CopyUseMI) {
+ MachineInstr *NewMI =
+ BuildMI(*MF, CopyUseMI->getDebugLoc(),
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
+ .addImm(Offset).addMetadata(Variable);
+ EntryMBB->insertAfter(CopyUseMI, NewMI);
+ }
}
}
@@ -303,10 +380,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode());
- // Operand 1 of an inline asm instruction indicates whether the asm
- // needs stack or not.
- if ((II->isInlineAsm() && II->getOperand(1).getImm()) ||
- (TID.isCall() && !TID.isReturn())) {
+ if ((TID.isCall() && !TID.isReturn()) ||
+ II->isStackAligningInlineAsm()) {
MFI->setHasCalls(true);
goto done;
}
@@ -362,6 +437,7 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
// Final step, emit the lowered DAG as machine code.
CodeGenAndEmitDAG();
+ return;
}
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
@@ -406,9 +482,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
// Only install this information if it tells us something.
if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) {
- DestReg -= TargetRegisterInfo::FirstVirtualRegister;
- if (DestReg >= FuncInfo->LiveOutRegInfo.size())
- FuncInfo->LiveOutRegInfo.resize(DestReg+1);
+ FuncInfo->LiveOutRegInfo.grow(DestReg);
FunctionLoweringInfo::LiveOutInfo &LOI =
FuncInfo->LiveOutRegInfo[DestReg];
LOI.NumSignBits = NumSignBits;
@@ -541,13 +615,19 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Emit machine code to BB. This can change 'BB' to the last block being
// inserted into.
+ MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
{
NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
- FuncInfo->MBB = Scheduler->EmitSchedule();
+ LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
FuncInfo->InsertPt = Scheduler->InsertPos;
}
+ // If the block was split, make sure we update any references that are used to
+ // update PHI nodes later on.
+ if (FirstMBB != LastMBB)
+ SDB->UpdateSplitBlock(FirstMBB, LastMBB);
+
// Free the scheduler state.
{
NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
@@ -563,19 +643,19 @@ void SelectionDAGISel::DoInstructionSelection() {
DEBUG(errs() << "===== Instruction selection begins:\n");
PreprocessISelDAG();
-
+
// Select target instructions for the DAG.
{
// Number all nodes with a topological order and set DAGSize.
DAGSize = CurDAG->AssignTopologicalOrder();
-
+
// Create a dummy node (which is not added to allnodes), that adds
// a reference to the root node, preventing it from being deleted,
// and tracking any changes of the root.
HandleSDNode Dummy(CurDAG->getRoot());
ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());
++ISelPosition;
-
+
// The AllNodes list is now topological-sorted. Visit the
// nodes by starting at the end of the list (the root of the
// graph) and preceding back toward the beginning (the entry
@@ -587,19 +667,19 @@ void SelectionDAGISel::DoInstructionSelection() {
// makes it theoretically possible to disable the DAGCombiner.
if (Node->use_empty())
continue;
-
+
SDNode *ResNode = Select(Node);
-
+
// FIXME: This is pretty gross. 'Select' should be changed to not return
// anything at all and this code should be nuked with a tactical strike.
-
+
// If node should not be replaced, continue with the next one.
if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
continue;
// Replace node.
if (ResNode)
ReplaceUses(Node, ResNode);
-
+
// If after the replacement this node is not used any more,
// remove this dead node.
if (Node->use_empty()) { // Don't delete EntryToken, etc.
@@ -607,9 +687,9 @@ void SelectionDAGISel::DoInstructionSelection() {
CurDAG->RemoveDeadNode(Node, &ISU);
}
}
-
+
CurDAG->setRoot(Dummy.getValue());
- }
+ }
DEBUG(errs() << "===== Instruction selection ends:\n");
@@ -661,6 +741,90 @@ void SelectionDAGISel::PrepareEHLandingPad() {
}
}
+
+
+
+bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+ FastISel *FastIS) {
+ // Don't try to fold volatile loads. Target has to deal with alignment
+ // constraints.
+ if (LI->isVolatile()) return false;
+
+ // Figure out which vreg this is going into.
+ unsigned LoadReg = FastIS->getRegForValue(LI);
+ assert(LoadReg && "Load isn't already assigned a vreg? ");
+
+ // Check to see what the uses of this vreg are. If it has no uses, or more
+ // than one use (at the machine instr level) then we can't fold it.
+ MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
+ if (RI == RegInfo->reg_end())
+ return false;
+
+ // See if there is exactly one use of the vreg. If there are multiple uses,
+ // then the instruction got lowered to multiple machine instructions or the
+ // use of the loaded value ended up being multiple operands of the result, in
+ // either case, we can't fold this.
+ MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
+ if (PostRI != RegInfo->reg_end())
+ return false;
+
+ assert(RI.getOperand().isUse() &&
+ "The only use of the vreg must be a use, we haven't emitted the def!");
+
+ MachineInstr *User = &*RI;
+
+ // Set the insertion point properly. Folding the load can cause generation of
+ // other random instructions (like sign extends) for addressing modes, make
+ // sure they get inserted in a logical place before the new instruction.
+ FuncInfo->InsertPt = User;
+ FuncInfo->MBB = User->getParent();
+
+ // Ask the target to try folding the load.
+ return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
+}
+
+#ifndef NDEBUG
+/// CheckLineNumbers - Check if basic block instructions follow source order
+/// or not.
+static void CheckLineNumbers(const BasicBlock *BB) {
+ unsigned Line = 0;
+ unsigned Col = 0;
+ for (BasicBlock::const_iterator BI = BB->begin(),
+ BE = BB->end(); BI != BE; ++BI) {
+ const DebugLoc DL = BI->getDebugLoc();
+ if (DL.isUnknown()) continue;
+ unsigned L = DL.getLine();
+ unsigned C = DL.getCol();
+ if (L < Line || (L == Line && C < Col)) {
+ ++NumBBWithOutOfOrderLineInfo;
+ return;
+ }
+ Line = L;
+ Col = C;
+ }
+}
+
+/// CheckLineNumbers - Check if machine basic block instructions follow source
+/// order or not.
+static void CheckLineNumbers(const MachineBasicBlock *MBB) {
+ unsigned Line = 0;
+ unsigned Col = 0;
+ for (MachineBasicBlock::const_iterator MBI = MBB->begin(),
+ MBE = MBB->end(); MBI != MBE; ++MBI) {
+ const DebugLoc DL = MBI->getDebugLoc();
+ if (DL.isUnknown()) continue;
+ unsigned L = DL.getLine();
+ unsigned C = DL.getCol();
+ if (L < Line || (L == Line && C < Col)) {
+ ++NumMBBWithOutOfOrderLineInfo;
+ return;
+ }
+ Line = L;
+ Col = C;
+ }
+}
+#endif
+
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = 0;
@@ -670,6 +834,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Iterate over all basic blocks in the function.
for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
const BasicBlock *LLVMBB = &*I;
+#ifndef NDEBUG
+ CheckLineNumbers(LLVMBB);
+#endif
FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
@@ -682,10 +849,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Setup an EH landing-pad block.
if (FuncInfo->MBB->isLandingPad())
PrepareEHLandingPad();
-
+
// Lower any arguments needed in this block if this is the entry block.
- if (LLVMBB == &Fn.getEntryBlock())
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ for (BasicBlock::const_iterator DBI = LLVMBB->begin(), DBE = LLVMBB->end();
+ DBI != DBE; ++DBI) {
+ if (const DbgInfoIntrinsic *DI = dyn_cast<DbgInfoIntrinsic>(DBI)) {
+ const DebugLoc DL = DI->getDebugLoc();
+ SDB->setCurDebugLoc(DL);
+ break;
+ }
+ }
LowerArguments(LLVMBB);
+ }
// Before doing SelectionDAG ISel, see if FastISel has been requested.
if (FastIS) {
@@ -723,8 +899,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->recomputeInsertPt();
// Try to select the instruction with FastISel.
- if (FastIS->SelectInstruction(Inst))
+ if (FastIS->SelectInstruction(Inst)) {
+ // If fast isel succeeded, check to see if there is a single-use
+ // non-volatile load right before the selected instruction, and see if
+ // the load is used by the instruction. If so, try to fold it.
+ const Instruction *BeforeInst = 0;
+ if (Inst != Begin)
+ BeforeInst = llvm::prior(llvm::prior(BI));
+ if (BeforeInst && isa<LoadInst>(BeforeInst) &&
+ BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst &&
+ TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS))
+ --BI; // If we succeeded, don't re-select the load.
continue;
+ }
// Then handle certain instructions as single-LLVM-Instruction blocks.
if (isa<CallInst>(Inst)) {
@@ -771,6 +958,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->recomputeInsertPt();
}
+ if (Begin != BI)
+ ++NumDAGBlocks;
+ else
+ ++NumFastIselBlocks;
+
// Run SelectionDAG instruction selection on the remainder of the block
// not handled by FastISel. If FastISel is not run, this is the entire
// block.
@@ -782,6 +974,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
delete FastIS;
+#ifndef NDEBUG
+ for (MachineFunction::const_iterator MBI = MF->begin(), MBE = MF->end();
+ MBI != MBE; ++MBI)
+ CheckLineNumbers(MBI);
+#endif
}
void
@@ -831,12 +1028,14 @@ SelectionDAGISel::FinishBasicBlock() {
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
if (j+1 != ej)
- SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB,
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ SDB->BitTestCases[i].Cases[j+1].ThisBB,
SDB->BitTestCases[i].Reg,
SDB->BitTestCases[i].Cases[j],
FuncInfo->MBB);
else
- SDB->visitBitTestCase(SDB->BitTestCases[i].Default,
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ SDB->BitTestCases[i].Default,
SDB->BitTestCases[i].Reg,
SDB->BitTestCases[i].Cases[j],
FuncInfo->MBB);
@@ -951,7 +1150,7 @@ SelectionDAGISel::FinishBasicBlock() {
// additional DAGs necessary.
for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
// Set the current basic block to the mbb we wish to insert the code into
- MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+ FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Determine the unique successors.
@@ -960,13 +1159,15 @@ SelectionDAGISel::FinishBasicBlock() {
if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
Succs.push_back(SDB->SwitchCases[i].FalseBB);
- // Emit the code. Note that this could result in ThisBB being split, so
- // we need to check for updates.
+ // Emit the code. Note that this could result in FuncInfo->MBB being split.
SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
- ThisBB = FuncInfo->MBB;
+
+ // Remember the last block, now that any splitting is done, for use in
+ // populating PHI nodes in successors.
+ MachineBasicBlock *ThisBB = FuncInfo->MBB;
// Handle any PHI nodes in successors of this chunk, as if we were coming
// from the original BB before switch expansion. Note that PHI nodes can
@@ -1016,10 +1217,6 @@ ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
return Ctor(this, OptLevel);
}
-ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {
- return new ScheduleHazardRecognizer();
-}
-
//===----------------------------------------------------------------------===//
// Helper functions used by the generated instruction selector.
//===----------------------------------------------------------------------===//
@@ -1099,11 +1296,11 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1
Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc
- Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]); // 3
+ Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack)
unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
- if (InOps[e-1].getValueType() == MVT::Flag)
- --e; // Don't process a flag operand if it is here.
+ if (InOps[e-1].getValueType() == MVT::Glue)
+ --e; // Don't process a glue operand if it is here.
while (i != e) {
unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
@@ -1130,15 +1327,15 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
}
}
- // Add the flag input back if present.
+ // Add the glue input back if present.
if (e != InOps.size())
Ops.push_back(InOps.back());
}
-/// findFlagUse - Return use of EVT::Flag value produced by the specified
+/// findGlueUse - Return use of MVT::Glue value produced by the specified
/// SDNode.
///
-static SDNode *findFlagUse(SDNode *N) {
+static SDNode *findGlueUse(SDNode *N) {
unsigned FlagResNo = N->getNumValues()-1;
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
SDUse &Use = I.getUse();
@@ -1160,11 +1357,11 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
// never find it.
//
// The Use may be -1 (unassigned) if it is a newly allocated node. This can
- // happen because we scan down to newly selected nodes in the case of flag
+ // happen because we scan down to newly selected nodes in the case of glue
// uses.
if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
return false;
-
+
// Don't revisit nodes if we already scanned it and didn't fail, we know we
// won't fail if we scan it again.
if (!Visited.insert(Use))
@@ -1174,7 +1371,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
// Ignore chain uses, they are validated by HandleMergeInputChains.
if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
continue;
-
+
SDNode *N = Use->getOperand(i).getNode();
if (N == Def) {
if (Use == ImmedUse || Use == Root)
@@ -1221,8 +1418,8 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
//
// * indicates nodes to be folded together.
//
- // If Root produces a flag, then it gets (even more) interesting. Since it
- // will be "glued" together with its flag use in the scheduler, we need to
+ // If Root produces glue, then it gets (even more) interesting. Since it
+ // will be "glued" together with its glue use in the scheduler, we need to
// check if it might reach N.
//
// [N*] //
@@ -1240,30 +1437,30 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
// ^ / //
// f / //
// | / //
- // [FU] //
+ // [GU] //
//
- // If FU (flag use) indirectly reaches N (the load), and Root folds N
- // (call it Fold), then X is a predecessor of FU and a successor of
- // Fold. But since Fold and FU are flagged together, this will create
+ // If GU (glue use) indirectly reaches N (the load), and Root folds N
+ // (call it Fold), then X is a predecessor of GU and a successor of
+ // Fold. But since Fold and GU are glued together, this will create
// a cycle in the scheduling graph.
- // If the node has flags, walk down the graph to the "lowest" node in the
- // flagged set.
+ // If the node has glue, walk down the graph to the "lowest" node in the
+ // glueged set.
EVT VT = Root->getValueType(Root->getNumValues()-1);
- while (VT == MVT::Flag) {
- SDNode *FU = findFlagUse(Root);
- if (FU == NULL)
+ while (VT == MVT::Glue) {
+ SDNode *GU = findGlueUse(Root);
+ if (GU == NULL)
break;
- Root = FU;
+ Root = GU;
VT = Root->getValueType(Root->getNumValues()-1);
-
- // If our query node has a flag result with a use, we've walked up it. If
+
+ // If our query node has a glue result with a use, we've walked up it. If
// the user (which has already been selected) has a chain or indirectly uses
// the chain, our WalkChainUsers predicate will not consider it. Because of
// this, we cannot ignore chains in this predicate.
IgnoreChains = false;
}
-
+
SmallPtrSet<SDNode*, 16> Visited;
return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
@@ -1272,10 +1469,10 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
std::vector<SDValue> Ops(N->op_begin(), N->op_end());
SelectInlineAsmMemoryOperands(Ops);
-
+
std::vector<EVT> VTs;
VTs.push_back(MVT::Other);
- VTs.push_back(MVT::Flag);
+ VTs.push_back(MVT::Glue);
SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
VTs, &Ops[0], Ops.size());
New->setNodeId(-1);
@@ -1287,11 +1484,11 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
}
/// GetVBR - decode a vbr encoding whose top bit is set.
-ALWAYS_INLINE static uint64_t
+LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
assert(Val >= 128 && "Not a VBR");
Val &= 127; // Remove first vbr bit.
-
+
unsigned Shift = 7;
uint64_t NextBits;
do {
@@ -1299,25 +1496,25 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
Val |= (NextBits&127) << Shift;
Shift += 7;
} while (NextBits & 128);
-
+
return Val;
}
-/// UpdateChainsAndFlags - When a match is complete, this method updates uses of
-/// interior flag and chain results to use the new flag and chain results.
+/// UpdateChainsAndGlue - When a match is complete, this method updates uses of
+/// interior glue and chain results to use the new glue and chain results.
void SelectionDAGISel::
-UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
- const SmallVectorImpl<SDNode*> &ChainNodesMatched,
- SDValue InputFlag,
- const SmallVectorImpl<SDNode*> &FlagResultNodesMatched,
- bool isMorphNodeTo) {
+UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
+ const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SDValue InputGlue,
+ const SmallVectorImpl<SDNode*> &GlueResultNodesMatched,
+ bool isMorphNodeTo) {
SmallVector<SDNode*, 4> NowDeadNodes;
-
+
ISelUpdater ISU(ISelPosition);
// Now that all the normal results are replaced, we replace the chain and
- // flag results if present.
+ // glue results if present.
if (!ChainNodesMatched.empty()) {
assert(InputChain.getNode() != 0 &&
"Matched input chains but didn't produce a chain");
@@ -1325,55 +1522,55 @@ UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
// Replace all the chain results with the final chain we ended up with.
for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
SDNode *ChainNode = ChainNodesMatched[i];
-
+
// If this node was already deleted, don't look at it.
if (ChainNode->getOpcode() == ISD::DELETED_NODE)
continue;
-
+
// Don't replace the results of the root node if we're doing a
// MorphNodeTo.
if (ChainNode == NodeToMatch && isMorphNodeTo)
continue;
-
+
SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
- if (ChainVal.getValueType() == MVT::Flag)
+ if (ChainVal.getValueType() == MVT::Glue)
ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
-
+
// If the node became dead and we haven't already seen it, delete it.
if (ChainNode->use_empty() &&
!std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
NowDeadNodes.push_back(ChainNode);
}
}
-
- // If the result produces a flag, update any flag results in the matched
- // pattern with the flag result.
- if (InputFlag.getNode() != 0) {
+
+ // If the result produces glue, update any glue results in the matched
+ // pattern with the glue result.
+ if (InputGlue.getNode() != 0) {
// Handle any interior nodes explicitly marked.
- for (unsigned i = 0, e = FlagResultNodesMatched.size(); i != e; ++i) {
- SDNode *FRN = FlagResultNodesMatched[i];
-
+ for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) {
+ SDNode *FRN = GlueResultNodesMatched[i];
+
// If this node was already deleted, don't look at it.
if (FRN->getOpcode() == ISD::DELETED_NODE)
continue;
-
- assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Flag &&
- "Doesn't have a flag result");
+
+ assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue &&
+ "Doesn't have a glue result");
CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
- InputFlag, &ISU);
-
+ InputGlue, &ISU);
+
// If the node became dead and we haven't already seen it, delete it.
if (FRN->use_empty() &&
!std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN))
NowDeadNodes.push_back(FRN);
}
}
-
+
if (!NowDeadNodes.empty())
CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU);
-
+
DEBUG(errs() << "ISEL: Match complete!\n");
}
@@ -1392,17 +1589,17 @@ enum ChainResult {
///
/// The walk we do here is guaranteed to be small because we quickly get down to
/// already selected nodes "below" us.
-static ChainResult
+static ChainResult
WalkChainUsers(SDNode *ChainedNode,
SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
ChainResult Result = CR_Simple;
-
+
for (SDNode::use_iterator UI = ChainedNode->use_begin(),
E = ChainedNode->use_end(); UI != E; ++UI) {
// Make sure the use is of the chain, not some other value we produce.
if (UI.getUse().getValueType() != MVT::Other) continue;
-
+
SDNode *User = *UI;
// If we see an already-selected machine node, then we've gone beyond the
@@ -1411,7 +1608,7 @@ WalkChainUsers(SDNode *ChainedNode,
if (User->isMachineOpcode() ||
User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
continue;
-
+
if (User->getOpcode() == ISD::CopyToReg ||
User->getOpcode() == ISD::CopyFromReg ||
User->getOpcode() == ISD::INLINEASM ||
@@ -1437,7 +1634,7 @@ WalkChainUsers(SDNode *ChainedNode,
if (!std::count(ChainedNodesInPattern.begin(),
ChainedNodesInPattern.end(), User))
return CR_InducesCycle;
-
+
// Otherwise we found a node that is part of our pattern. For example in:
// x = load ptr
// y = x+4
@@ -1449,7 +1646,7 @@ WalkChainUsers(SDNode *ChainedNode,
InteriorChainedNodes.push_back(User);
continue;
}
-
+
// If we found a TokenFactor, there are two cases to consider: first if the
// TokenFactor is just hanging "below" the pattern we're matching (i.e. no
// uses of the TF are in our pattern) we just want to ignore it. Second,
@@ -1486,7 +1683,7 @@ WalkChainUsers(SDNode *ChainedNode,
case CR_LeadsToInteriorNode:
break; // Otherwise, keep processing.
}
-
+
// Okay, we know we're in the interesting interior case. The TokenFactor
// is now going to be considered part of the pattern so that we rewrite its
// uses (it may have uses that are not part of the pattern) with the
@@ -1497,7 +1694,7 @@ WalkChainUsers(SDNode *ChainedNode,
InteriorChainedNodes.push_back(User);
continue;
}
-
+
return Result;
}
@@ -1519,7 +1716,7 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
InteriorChainedNodes) == CR_InducesCycle)
return SDValue(); // Would induce a cycle.
}
-
+
// Okay, we have walked all the matched nodes and collected TokenFactor nodes
// that we are interested in. Form our input TokenFactor node.
SmallVector<SDValue, 3> InputChains;
@@ -1530,14 +1727,14 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
if (N->getOpcode() != ISD::TokenFactor) {
if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
continue;
-
+
// Otherwise, add the input chain.
SDValue InChain = ChainNodesMatched[i]->getOperand(0);
assert(InChain.getValueType() == MVT::Other && "Not a chain");
InputChains.push_back(InChain);
continue;
}
-
+
// If we have a token factor, we want to add all inputs of the token factor
// that are not part of the pattern we're matching.
for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
@@ -1546,13 +1743,13 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
InputChains.push_back(N->getOperand(op));
}
}
-
+
SDValue Res;
if (InputChains.size() == 1)
return InputChains[0];
return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),
MVT::Other, &InputChains[0], InputChains.size());
-}
+}
/// MorphNode - Handle morphing a node in place for the selector.
SDNode *SelectionDAGISel::
@@ -1560,15 +1757,15 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
// It is possible we're using MorphNodeTo to replace a node with no
// normal results with one that has a normal result (or we could be
- // adding a chain) and the input could have flags and chains as well.
+ // adding a chain) and the input could have glue and chains as well.
// In this case we need to shift the operands down.
// FIXME: This is a horrible hack and broken in obscure cases, no worse
// than the old isel though.
- int OldFlagResultNo = -1, OldChainResultNo = -1;
+ int OldGlueResultNo = -1, OldChainResultNo = -1;
unsigned NTMNumResults = Node->getNumValues();
- if (Node->getValueType(NTMNumResults-1) == MVT::Flag) {
- OldFlagResultNo = NTMNumResults-1;
+ if (Node->getValueType(NTMNumResults-1) == MVT::Glue) {
+ OldGlueResultNo = NTMNumResults-1;
if (NTMNumResults != 1 &&
Node->getValueType(NTMNumResults-2) == MVT::Other)
OldChainResultNo = NTMNumResults-2;
@@ -1589,54 +1786,55 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
}
unsigned ResNumResults = Res->getNumValues();
- // Move the flag if needed.
- if ((EmitNodeInfo & OPFL_FlagOutput) && OldFlagResultNo != -1 &&
- (unsigned)OldFlagResultNo != ResNumResults-1)
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldFlagResultNo),
+ // Move the glue if needed.
+ if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
+ (unsigned)OldGlueResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo),
SDValue(Res, ResNumResults-1));
- if ((EmitNodeInfo & OPFL_FlagOutput) != 0)
+ if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
--ResNumResults;
// Move the chain reference if needed.
if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
(unsigned)OldChainResultNo != ResNumResults-1)
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
SDValue(Res, ResNumResults-1));
// Otherwise, no replacement happened because the node already exists. Replace
// Uses of the old node with the new one.
if (Res != Node)
CurDAG->ReplaceAllUsesWith(Node, Res);
-
+
return Res;
}
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N, const SmallVectorImpl<SDValue> &RecordedNodes) {
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
// Accept if it is exactly the same as a previously recorded node.
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- return N == RecordedNodes[RecNo];
+ return N == RecordedNodes[RecNo].first;
}
-
+
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SelectionDAGISel &SDISel) {
return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
}
/// CheckNodePredicate - Implements OP_CheckNodePredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SelectionDAGISel &SDISel, SDNode *N) {
return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDNode *N) {
uint16_t Opc = MatcherTable[MatcherIndex++];
@@ -1644,17 +1842,17 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return N->getOpcode() == Opc;
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering &TLI) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (N.getValueType() == VT) return true;
-
+
// Handle the case when VT is iPTR.
return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering &TLI,
unsigned ChildNo) {
@@ -1664,57 +1862,57 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
return cast<CondCodeSDNode>(N)->get() ==
(ISD::CondCode)MatcherTable[MatcherIndex++];
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering &TLI) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (cast<VTSDNode>(N)->getVT() == VT)
return true;
-
+
// Handle the case when VT is iPTR.
return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
-
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
return C != 0 && C->getSExtValue() == Val;
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
-
+
if (N->getOpcode() != ISD::AND) return false;
-
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
-
+
if (N->getOpcode() != ISD::OR) return false;
-
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
}
@@ -1724,11 +1922,11 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
/// fail, set Result=true and return anything. If the current predicate is
/// known to pass, set Result=false and return the MatcherIndex to continue
/// with. If the current predicate is unknown, set Result=false and return the
-/// MatcherIndex to continue with.
+/// MatcherIndex to continue with.
static unsigned IsPredicateKnownToFail(const unsigned char *Table,
unsigned Index, SDValue N,
bool &Result, SelectionDAGISel &SDISel,
- SmallVectorImpl<SDValue> &RecordedNodes){
+ SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
switch (Table[Index++]) {
default:
Result = false;
@@ -1782,21 +1980,21 @@ namespace {
struct MatchScope {
/// FailIndex - If this match fails, this is the index to continue with.
unsigned FailIndex;
-
+
/// NodeStack - The node stack when the scope was formed.
SmallVector<SDValue, 4> NodeStack;
-
+
/// NumRecordedNodes - The number of recorded nodes when the scope was formed.
unsigned NumRecordedNodes;
-
+
/// NumMatchedMemRefs - The number of matched memref entries.
unsigned NumMatchedMemRefs;
-
- /// InputChain/InputFlag - The current chain/flag
- SDValue InputChain, InputFlag;
+
+ /// InputChain/InputGlue - The current chain/glue
+ SDValue InputChain, InputGlue;
/// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
- bool HasChainNodesMatched, HasFlagResultNodesMatched;
+ bool HasChainNodesMatched, HasGlueResultNodesMatched;
};
}
@@ -1838,7 +2036,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
case ISD::UNDEF: return Select_UNDEF(NodeToMatch);
}
-
+
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
// Set up the node stack with NodeToMatch as the only node on the stack.
@@ -1849,37 +2047,38 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// MatchScopes - Scopes used when matching, if a match failure happens, this
// indicates where to continue checking.
SmallVector<MatchScope, 8> MatchScopes;
-
+
// RecordedNodes - This is the set of nodes that have been recorded by the
- // state machine.
- SmallVector<SDValue, 8> RecordedNodes;
-
+ // state machine. The second value is the parent of the node, or null if the
+ // root is recorded.
+ SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes;
+
// MatchedMemRefs - This is the set of MemRef's we've seen in the input
// pattern.
SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
-
- // These are the current input chain and flag for use when generating nodes.
+
+ // These are the current input chain and glue for use when generating nodes.
// Various Emit operations change these. For example, emitting a copytoreg
// uses and updates these.
- SDValue InputChain, InputFlag;
-
+ SDValue InputChain, InputGlue;
+
// ChainNodesMatched - If a pattern matches nodes that have input/output
// chains, the OPC_EmitMergeInputChains operation is emitted which indicates
// which ones they are. The result is captured into this list so that we can
// update the chain results when the pattern is complete.
SmallVector<SDNode*, 3> ChainNodesMatched;
- SmallVector<SDNode*, 3> FlagResultNodesMatched;
-
+ SmallVector<SDNode*, 3> GlueResultNodesMatched;
+
DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
NodeToMatch->dump(CurDAG);
errs() << '\n');
-
+
// Determine where to start the interpreter. Normally we start at opcode #0,
// but if the state machine starts with an OPC_SwitchOpcode, then we
// accelerate the first lookup (which is guaranteed to be hot) with the
// OpcodeOffset table.
unsigned MatcherIndex = 0;
-
+
if (!OpcodeOffset.empty()) {
// Already computed the OpcodeOffset table, just index into it.
if (N.getOpcode() < OpcodeOffset.size())
@@ -1911,7 +2110,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (N.getOpcode() < OpcodeOffset.size())
MatcherIndex = OpcodeOffset[N.getOpcode()];
}
-
+
while (1) {
assert(MatcherIndex < TableSize && "Invalid index");
#ifndef NDEBUG
@@ -1926,7 +2125,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// determine immediately that the first check (or first several) will
// immediately fail, don't even bother pushing a scope for them.
unsigned FailIndex;
-
+
while (1) {
unsigned NumToSkip = MatcherTable[MatcherIndex++];
if (NumToSkip & 128)
@@ -1936,12 +2135,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
FailIndex = 0;
break;
}
-
+
FailIndex = MatcherIndex+NumToSkip;
-
+
unsigned MatcherIndexOfPredicate = MatcherIndex;
(void)MatcherIndexOfPredicate; // silence warning.
-
+
// If we can't evaluate this predicate without pushing a scope (e.g. if
// it is a 'MoveParent') or if the predicate succeeds on this node, we
// push the scope and evaluate the full predicate chain.
@@ -1950,20 +2149,20 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
Result, *this, RecordedNodes);
if (!Result)
break;
-
+
DEBUG(errs() << " Skipped scope entry (due to false predicate) at "
<< "index " << MatcherIndexOfPredicate
<< ", continuing at " << FailIndex << "\n");
++NumDAGIselRetries;
-
+
// Otherwise, we know that this case of the Scope is guaranteed to fail,
// move to the next case.
MatcherIndex = FailIndex;
}
-
+
// If the whole scope failed to match, bail.
if (FailIndex == 0) break;
-
+
// Push a MatchScope which indicates where to go if the first child fails
// to match.
MatchScope NewEntry;
@@ -1972,17 +2171,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
NewEntry.NumRecordedNodes = RecordedNodes.size();
NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
NewEntry.InputChain = InputChain;
- NewEntry.InputFlag = InputFlag;
+ NewEntry.InputGlue = InputGlue;
NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
- NewEntry.HasFlagResultNodesMatched = !FlagResultNodesMatched.empty();
+ NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty();
MatchScopes.push_back(NewEntry);
continue;
}
- case OPC_RecordNode:
+ case OPC_RecordNode: {
// Remember this node, it may end up being an operand in the pattern.
- RecordedNodes.push_back(N);
+ SDNode *Parent = 0;
+ if (NodeStack.size() > 1)
+ Parent = NodeStack[NodeStack.size()-2].getNode();
+ RecordedNodes.push_back(std::make_pair(N, Parent));
continue;
-
+ }
+
case OPC_RecordChild0: case OPC_RecordChild1:
case OPC_RecordChild2: case OPC_RecordChild3:
case OPC_RecordChild4: case OPC_RecordChild5:
@@ -1991,20 +2194,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (ChildNo >= N.getNumOperands())
break; // Match fails if out of range child #.
- RecordedNodes.push_back(N->getOperand(ChildNo));
+ RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
+ N.getNode()));
continue;
}
case OPC_RecordMemRef:
MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
continue;
-
- case OPC_CaptureFlagInput:
- // If the current node has an input flag, capture it in InputFlag.
+
+ case OPC_CaptureGlueInput:
+ // If the current node has an input glue, capture it in InputGlue.
if (N->getNumOperands() != 0 &&
- N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag)
- InputFlag = N->getOperand(N->getNumOperands()-1);
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
+ InputGlue = N->getOperand(N->getNumOperands()-1);
continue;
-
+
case OPC_MoveChild: {
unsigned ChildNo = MatcherTable[MatcherIndex++];
if (ChildNo >= N.getNumOperands())
@@ -2013,14 +2217,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
NodeStack.push_back(N);
continue;
}
-
+
case OPC_MoveParent:
// Pop the current node off the NodeStack.
NodeStack.pop_back();
assert(!NodeStack.empty() && "Node stack imbalance!");
- N = NodeStack.back();
+ N = NodeStack.back();
continue;
-
+
case OPC_CheckSame:
if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
continue;
@@ -2036,7 +2240,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
unsigned CPNum = MatcherTable[MatcherIndex++];
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
- if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum,
+ if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
+ RecordedNodes[RecNo].first, CPNum,
RecordedNodes))
break;
continue;
@@ -2044,11 +2249,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_CheckOpcode:
if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
continue;
-
+
case OPC_CheckType:
if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
continue;
-
+
case OPC_SwitchOpcode: {
unsigned CurNodeOpcode = N.getOpcode();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
@@ -2066,22 +2271,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// If the opcode matches, then we will execute this case.
if (CurNodeOpcode == Opc)
break;
-
+
// Otherwise, skip over this case.
MatcherIndex += CaseSize;
}
-
+
// If no cases matched, bail out.
if (CaseSize == 0) break;
-
+
// Otherwise, execute the case we found.
DEBUG(errs() << " OpcodeSwitch from " << SwitchStart
<< " to " << MatcherIndex << "\n");
continue;
}
-
+
case OPC_SwitchType: {
- MVT::SimpleValueType CurNodeVT = N.getValueType().getSimpleVT().SimpleTy;
+ MVT CurNodeVT = N.getValueType().getSimpleVT();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
unsigned CaseSize;
while (1) {
@@ -2090,23 +2295,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (CaseSize & 128)
CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
if (CaseSize == 0) break;
-
- MVT::SimpleValueType CaseVT =
- (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+
+ MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (CaseVT == MVT::iPTR)
- CaseVT = TLI.getPointerTy().SimpleTy;
-
+ CaseVT = TLI.getPointerTy();
+
// If the VT matches, then we will execute this case.
if (CurNodeVT == CaseVT)
break;
-
+
// Otherwise, skip over this case.
MatcherIndex += CaseSize;
}
-
+
// If no cases matched, bail out.
if (CaseSize == 0) break;
-
+
// Otherwise, execute the case we found.
DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
<< "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
@@ -2135,7 +2339,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_CheckOrImm:
if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
continue;
-
+
case OPC_CheckFoldableChainNode: {
assert(NodeStack.size() != 1 && "No parent node");
// Verify that all intermediate nodes between the root and this one have
@@ -2156,7 +2360,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
NodeToMatch, OptLevel,
true/*We validate our own chains*/))
break;
-
+
continue;
}
case OPC_EmitInteger: {
@@ -2165,22 +2369,24 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
- RecordedNodes.push_back(CurDAG->getTargetConstant(Val, VT));
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getTargetConstant(Val, VT), (SDNode*)0));
continue;
}
case OPC_EmitRegister: {
MVT::SimpleValueType VT =
(MVT::SimpleValueType)MatcherTable[MatcherIndex++];
unsigned RegNo = MatcherTable[MatcherIndex++];
- RecordedNodes.push_back(CurDAG->getRegister(RegNo, VT));
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), (SDNode*)0));
continue;
}
-
+
case OPC_EmitConvertToTarget: {
// Convert from IMM/FPIMM to target version.
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- SDValue Imm = RecordedNodes[RecNo];
+ SDValue Imm = RecordedNodes[RecNo].first;
if (Imm->getOpcode() == ISD::Constant) {
int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
@@ -2189,11 +2395,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
}
-
- RecordedNodes.push_back(Imm);
+
+ RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
continue;
}
-
+
case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0
case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1
// These are space-optimized forms of OPC_EmitMergeInputChains.
@@ -2201,28 +2407,28 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
"EmitMergeInputChains should be the first chain producing node");
assert(ChainNodesMatched.empty() &&
"Should only have one EmitMergeInputChains per match");
-
+
// Read all of the chained nodes.
unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
-
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
// FIXME: What if other value results of the node have uses not matched
// by this pattern?
if (ChainNodesMatched.back() != NodeToMatch &&
- !RecordedNodes[RecNo].hasOneUse()) {
+ !RecordedNodes[RecNo].first.hasOneUse()) {
ChainNodesMatched.clear();
break;
}
-
+
// Merge the input chains if they are not intra-pattern references.
InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
-
+
if (InputChain.getNode() == 0)
break; // Failed to merge.
continue;
}
-
+
case OPC_EmitMergeInputChains: {
assert(InputChain.getNode() == 0 &&
"EmitMergeInputChains should be the first chain producing node");
@@ -2242,54 +2448,55 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
for (unsigned i = 0; i != NumChains; ++i) {
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
-
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
// FIXME: What if other value results of the node have uses not matched
// by this pattern?
if (ChainNodesMatched.back() != NodeToMatch &&
- !RecordedNodes[RecNo].hasOneUse()) {
+ !RecordedNodes[RecNo].first.hasOneUse()) {
ChainNodesMatched.clear();
break;
}
}
-
+
// If the inner loop broke out, the match fails.
if (ChainNodesMatched.empty())
break;
// Merge the input chains if they are not intra-pattern references.
InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
-
+
if (InputChain.getNode() == 0)
break; // Failed to merge.
continue;
}
-
+
case OPC_EmitCopyToReg: {
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
unsigned DestPhysReg = MatcherTable[MatcherIndex++];
-
+
if (InputChain.getNode() == 0)
InputChain = CurDAG->getEntryNode();
-
+
InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(),
- DestPhysReg, RecordedNodes[RecNo],
- InputFlag);
-
- InputFlag = InputChain.getValue(1);
+ DestPhysReg, RecordedNodes[RecNo].first,
+ InputGlue);
+
+ InputGlue = InputChain.getValue(1);
continue;
}
-
+
case OPC_EmitNodeXForm: {
unsigned XFormNo = MatcherTable[MatcherIndex++];
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- RecordedNodes.push_back(RunSDNodeXForm(RecordedNodes[RecNo], XFormNo));
+ SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0));
continue;
}
-
+
case OPC_EmitNode:
case OPC_MorphNodeTo: {
uint16_t TargetOpc = MatcherTable[MatcherIndex++];
@@ -2304,12 +2511,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;
VTs.push_back(VT);
}
-
+
if (EmitNodeInfo & OPFL_Chain)
VTs.push_back(MVT::Other);
- if (EmitNodeInfo & OPFL_FlagOutput)
- VTs.push_back(MVT::Flag);
-
+ if (EmitNodeInfo & OPFL_GlueOutput)
+ VTs.push_back(MVT::Glue);
+
// This is hot code, so optimize the two most common cases of 1 and 2
// results.
SDVTList VTList;
@@ -2327,11 +2534,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
unsigned RecNo = MatcherTable[MatcherIndex++];
if (RecNo & 128)
RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
-
+
assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
- Ops.push_back(RecordedNodes[RecNo]);
+ Ops.push_back(RecordedNodes[RecNo].first);
}
-
+
// If there are variadic operands to add, handle them now.
if (EmitNodeInfo & OPFL_VariadicInfo) {
// Determine the start index to copy from.
@@ -2339,22 +2546,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
"Invalid variadic node");
- // Copy all of the variadic operands, not including a potential flag
+ // Copy all of the variadic operands, not including a potential glue
// input.
for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
i != e; ++i) {
SDValue V = NodeToMatch->getOperand(i);
- if (V.getValueType() == MVT::Flag) break;
+ if (V.getValueType() == MVT::Glue) break;
Ops.push_back(V);
}
}
-
- // If this has chain/flag inputs, add them.
+
+ // If this has chain/glue inputs, add them.
if (EmitNodeInfo & OPFL_Chain)
Ops.push_back(InputChain);
- if ((EmitNodeInfo & OPFL_FlagInput) && InputFlag.getNode() != 0)
- Ops.push_back(InputFlag);
-
+ if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0)
+ Ops.push_back(InputGlue);
+
// Create the node.
SDNode *Res = 0;
if (Opcode != OPC_MorphNodeTo) {
@@ -2362,28 +2569,29 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// add the results to the RecordedNodes list.
Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
VTList, Ops.data(), Ops.size());
-
- // Add all the non-flag/non-chain results to the RecordedNodes list.
+
+ // Add all the non-glue/non-chain results to the RecordedNodes list.
for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
- if (VTs[i] == MVT::Other || VTs[i] == MVT::Flag) break;
- RecordedNodes.push_back(SDValue(Res, i));
+ if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break;
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
+ (SDNode*) 0));
}
-
+
} else {
Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
EmitNodeInfo);
}
-
- // If the node had chain/flag results, update our notion of the current
- // chain and flag.
- if (EmitNodeInfo & OPFL_FlagOutput) {
- InputFlag = SDValue(Res, VTs.size()-1);
+
+ // If the node had chain/glue results, update our notion of the current
+ // chain and glue.
+ if (EmitNodeInfo & OPFL_GlueOutput) {
+ InputGlue = SDValue(Res, VTs.size()-1);
if (EmitNodeInfo & OPFL_Chain)
InputChain = SDValue(Res, VTs.size()-2);
} else if (EmitNodeInfo & OPFL_Chain)
InputChain = SDValue(Res, VTs.size()-1);
- // If the OPFL_MemRefs flag is set on this node, slap all of the
+ // If the OPFL_MemRefs glue is set on this node, slap all of the
// accumulated memrefs onto it.
//
// FIXME: This is vastly incorrect for patterns with multiple outputs
@@ -2396,37 +2604,37 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
cast<MachineSDNode>(Res)
->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size());
}
-
+
DEBUG(errs() << " "
<< (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
<< " node: "; Res->dump(CurDAG); errs() << "\n");
-
+
// If this was a MorphNodeTo then we're completely done!
if (Opcode == OPC_MorphNodeTo) {
- // Update chain and flag uses.
- UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
- InputFlag, FlagResultNodesMatched, true);
+ // Update chain and glue uses.
+ UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+ InputGlue, GlueResultNodesMatched, true);
return Res;
}
-
+
continue;
}
-
- case OPC_MarkFlagResults: {
+
+ case OPC_MarkGlueResults: {
unsigned NumNodes = MatcherTable[MatcherIndex++];
-
- // Read and remember all the flag-result nodes.
+
+ // Read and remember all the glue-result nodes.
for (unsigned i = 0; i != NumNodes; ++i) {
unsigned RecNo = MatcherTable[MatcherIndex++];
if (RecNo & 128)
RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- FlagResultNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+ GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
}
continue;
}
-
+
case OPC_CompleteMatch: {
// The match has been completed, and any new nodes (if any) have been
// created. Patch up references to the matched dag to use the newly
@@ -2437,13 +2645,13 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
unsigned ResSlot = MatcherTable[MatcherIndex++];
if (ResSlot & 128)
ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
-
+
assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
- SDValue Res = RecordedNodes[ResSlot];
-
+ SDValue Res = RecordedNodes[ResSlot].first;
+
assert(i < NodeToMatch->getNumValues() &&
NodeToMatch->getValueType(i) != MVT::Other &&
- NodeToMatch->getValueType(i) != MVT::Flag &&
+ NodeToMatch->getValueType(i) != MVT::Glue &&
"Invalid number of results to complete!");
assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
NodeToMatch->getValueType(i) == MVT::iPTR ||
@@ -2454,24 +2662,23 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
}
- // If the root node defines a flag, add it to the flag nodes to update
- // list.
- if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Flag)
- FlagResultNodesMatched.push_back(NodeToMatch);
-
- // Update chain and flag uses.
- UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
- InputFlag, FlagResultNodesMatched, false);
-
+ // If the root node defines glue, add it to the glue nodes to update list.
+ if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue)
+ GlueResultNodesMatched.push_back(NodeToMatch);
+
+ // Update chain and glue uses.
+ UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+ InputGlue, GlueResultNodesMatched, false);
+
assert(NodeToMatch->use_empty() &&
"Didn't replace all uses of the node?");
-
+
// FIXME: We just return here, which interacts correctly with SelectRoot
// above. We should fix this to not return an SDNode* anymore.
return 0;
}
}
-
+
// If the code reached this point, then the match failed. See if there is
// another child to try in the current 'Scope', otherwise pop it until we
// find a case to check.
@@ -2494,15 +2701,15 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
MatcherIndex = LastScope.FailIndex;
-
+
DEBUG(errs() << " Continuing at " << MatcherIndex << "\n");
-
+
InputChain = LastScope.InputChain;
- InputFlag = LastScope.InputFlag;
+ InputGlue = LastScope.InputGlue;
if (!LastScope.HasChainNodesMatched)
ChainNodesMatched.clear();
- if (!LastScope.HasFlagResultNodesMatched)
- FlagResultNodesMatched.clear();
+ if (!LastScope.HasGlueResultNodesMatched)
+ GlueResultNodesMatched.clear();
// Check to see what the offset is at the new MatcherIndex. If it is zero
// we have reached the end of this scope, otherwise we have another child
@@ -2517,21 +2724,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
LastScope.FailIndex = MatcherIndex+NumToSkip;
break;
}
-
+
// End of this scope, pop it and try the next child in the containing
// scope.
MatchScopes.pop_back();
}
}
}
-
+
void SelectionDAGISel::CannotYetSelect(SDNode *N) {
std::string msg;
raw_string_ostream Msg(msg);
- Msg << "Cannot yet select: ";
-
+ Msg << "Cannot select: ";
+
if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
N->getOpcode() != ISD::INTRINSIC_VOID) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 8313de5..76eb945 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -93,7 +93,7 @@ namespace llvm {
static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
SDValue Op = EI.getNode()->getOperand(EI.getOperand());
EVT VT = Op.getValueType();
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return "color=red,style=bold";
else if (VT == MVT::Other)
return "color=blue,style=dashed";
@@ -273,14 +273,14 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
raw_string_ostream O(s);
O << "SU(" << SU->NodeNum << "): ";
if (SU->getNode()) {
- SmallVector<SDNode *, 4> FlaggedNodes;
- for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
- FlaggedNodes.push_back(N);
- while (!FlaggedNodes.empty()) {
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
O << DOTGraphTraits<SelectionDAG*>
- ::getSimpleNodeLabel(FlaggedNodes.back(), DAG);
- FlaggedNodes.pop_back();
- if (!FlaggedNodes.empty())
+ ::getSimpleNodeLabel(GluedNodes.back(), DAG);
+ GluedNodes.pop_back();
+ if (!GluedNodes.empty())
O << "\n ";
}
} else {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b74f600..691390e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include <cctype>
using namespace llvm;
namespace llvm {
@@ -530,7 +531,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
}
-
+
// These operations default to expand.
setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
@@ -538,8 +539,8 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
// Most targets ignore the @llvm.prefetch intrinsic.
setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
-
- // ConstantFP nodes default to expand. Targets can either change this to
+
+ // ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
@@ -560,18 +561,21 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
-
+
IsLittleEndian = TD->isLittleEndian();
ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+ maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize
+ = maxStoresPerMemmoveOptSize = 4;
benefitFromCodePlacementOpt = false;
UseUnderscoreSetJmp = false;
UseUnderscoreLongJmp = false;
SelectIsExpensive = false;
IntDivIsCheap = false;
Pow2DivIsCheap = false;
+ JumpIsExpensive = false;
StackPointerRegisterToSaveRestore = 0;
ExceptionPointerRegister = 0;
ExceptionSelectorRegister = 0;
@@ -617,16 +621,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
// Figure out the right, legal destination reg to copy into.
unsigned NumElts = VT.getVectorNumElements();
MVT EltTy = VT.getVectorElementType();
-
+
unsigned NumVectorRegs = 1;
-
- // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
// could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(NumElts)) {
NumVectorRegs = NumElts;
NumElts = 1;
}
-
+
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
@@ -635,7 +639,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
}
NumIntermediates = NumVectorRegs;
-
+
MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
if (!TLI->isTypeLegal(NewVT))
NewVT = EltTy;
@@ -645,7 +649,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
RegisterVT = DestVT;
if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
-
+
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
return NumVectorRegs;
@@ -750,7 +754,7 @@ void TargetLowering::computeRegisterProperties() {
RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
TransformToType[MVT::ppcf128] = MVT::f64;
ValueTypeActions.setTypeAction(MVT::ppcf128, Expand);
- }
+ }
// Decide how to handle f64. If the target does not have native f64 support,
// expand it to i64 and we will be generating soft float library calls.
@@ -776,13 +780,13 @@ void TargetLowering::computeRegisterProperties() {
ValueTypeActions.setTypeAction(MVT::f32, Expand);
}
}
-
+
// Loop over all of the vector value types to see which need transformations.
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT VT = (MVT::SimpleValueType)i;
if (isTypeLegal(VT)) continue;
-
+
// Determine if there is a legal wider type. If so, we should promote to
// that wider vector type.
EVT EltVT = VT.getVectorElementType();
@@ -792,8 +796,8 @@ void TargetLowering::computeRegisterProperties() {
for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
EVT SVT = (MVT::SimpleValueType)nVT;
if (SVT.getVectorElementType() == EltVT &&
- SVT.getVectorNumElements() > NElts &&
- isTypeSynthesizable(SVT)) {
+ SVT.getVectorNumElements() > NElts &&
+ isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@@ -804,7 +808,7 @@ void TargetLowering::computeRegisterProperties() {
}
if (IsLegalWiderType) continue;
}
-
+
MVT IntermediateVT;
EVT RegisterVT;
unsigned NumIntermediates;
@@ -812,7 +816,7 @@ void TargetLowering::computeRegisterProperties() {
getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
RegisterVT, this);
RegisterTypeForVT[i] = RegisterVT;
-
+
EVT NVT = VT.getPow2VectorType();
if (NVT == VT) {
// Type is already a power of 2. The default action is to split.
@@ -865,7 +869,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
unsigned &NumIntermediates,
EVT &RegisterVT) const {
unsigned NumElts = VT.getVectorNumElements();
-
+
// If there is a wider vector type with the same element type as this one,
// we should widen to that legal vector type. This handles things like
// <2 x float> -> <4 x float>.
@@ -877,19 +881,19 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
return 1;
}
}
-
+
// Figure out the right, legal destination reg to copy into.
EVT EltTy = VT.getVectorElementType();
-
+
unsigned NumVectorRegs = 1;
-
- // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
// could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(NumElts)) {
NumVectorRegs = NumElts;
NumElts = 1;
}
-
+
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
while (NumElts > 1 && !isTypeLegal(
@@ -899,7 +903,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
}
NumIntermediates = NumVectorRegs;
-
+
EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
if (!isTypeLegal(NewVT))
NewVT = EltTy;
@@ -909,13 +913,13 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
RegisterVT = DestVT;
if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
-
+
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
return NumVectorRegs;
}
-/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// Get the EVTs and ArgFlags collections that represent the legalized return
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
/// TODO: Move this out of TargetLowering.cpp.
@@ -988,11 +992,11 @@ unsigned TargetLowering::getJumpTableEncoding() const {
// In non-pic modes, just use the address of a block.
if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
return MachineJumpTableInfo::EK_BlockAddress;
-
+
// In PIC mode, if the target supports a GPRel32 directive, use it.
if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
return MachineJumpTableInfo::EK_GPRel32BlockAddress;
-
+
// Otherwise, use a label difference.
return MachineJumpTableInfo::EK_LabelDifference32;
}
@@ -1036,11 +1040,11 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// Optimization Methods
//===----------------------------------------------------------------------===//
-/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
/// specified instruction is a constant integer. If so, check to see if there
/// are any bits set in the constant that are not demanded. If so, shrink the
/// constant and return true.
-bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
const APInt &Demanded) {
DebugLoc dl = Op.getDebugLoc();
@@ -1062,7 +1066,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
EVT VT = Op.getValueType();
SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
DAG.getConstant(Demanded &
- C->getAPIntValue(),
+ C->getAPIntValue(),
VT));
return CombineTo(Op, New);
}
@@ -1139,9 +1143,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownZero = KnownOne = APInt(BitWidth, 0);
// Other users may use these bits.
- if (!Op.getNode()->hasOneUse()) {
+ if (!Op.getNode()->hasOneUse()) {
if (Depth != 0) {
- // If not at the root, Just compute the KnownZero/KnownOne bits to
+ // If not at the root, Just compute the KnownZero/KnownOne bits to
// simplify things downstream.
TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
return false;
@@ -1149,7 +1153,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If this is the root being simplified, allow it to have multiple uses,
// just set the NewMask to all bits.
NewMask = APInt::getAllOnesValue(BitWidth);
- } else if (DemandedMask == 0) {
+ } else if (DemandedMask == 0) {
// Not demanding any bits from Op.
if (Op.getOpcode() != ISD::UNDEF)
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
@@ -1172,8 +1176,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// the RHS.
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
APInt LHSZero, LHSOne;
+ // Do not increment Depth here; that can cause an infinite loop.
TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
- LHSZero, LHSOne, Depth+1);
+ LHSZero, LHSOne, Depth);
// If the LHS already has zeros where RHSC does, this and is dead.
if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
return TLO.CombineTo(Op, Op.getOperand(0));
@@ -1182,16 +1187,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
return true;
}
-
+
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
KnownZero2, KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
@@ -1214,15 +1219,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownZero |= KnownZero2;
break;
case ISD::OR:
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
KnownZero2, KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
@@ -1248,15 +1253,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownOne |= KnownOne2;
break;
case ISD::XOR:
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
if ((KnownZero & NewMask) == NewMask)
@@ -1274,12 +1279,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
Op.getOperand(0),
Op.getOperand(1)));
-
+
// Output known-0 bits are known if clear or set in both the LHS & RHS.
KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
-
+
// If all of the demanded bits on one side are known, and all of the set
// bits on that side are also known to be set on the other side, turn this
// into an AND, as we know the bits will be cleared.
@@ -1288,11 +1293,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if ((KnownOne & KnownOne2) == KnownOne) {
EVT VT = Op.getValueType();
SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
Op.getOperand(0), ANDC));
}
}
-
+
// If the RHS is a constant, see if we can simplify it.
// for XOR, we prefer to force bits to 1 if they will make a -1.
// if we can't force bits, try to shrink constant
@@ -1317,37 +1322,37 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownOne = KnownOneOut;
break;
case ISD::SELECT:
- if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If the operands are constants, see if we can simplify them.
if (TLO.ShrinkDemandedConstant(Op, NewMask))
return true;
-
+
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
break;
case ISD::SELECT_CC:
- if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If the operands are constants, see if we can simplify them.
if (TLO.ShrinkDemandedConstant(Op, NewMask))
return true;
-
+
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
@@ -1373,16 +1378,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SRL;
- }
-
- SDValue NewSA =
+ }
+
+ SDValue NewSA =
TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
EVT VT = Op.getValueType();
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
InOp.getOperand(0), NewSA));
}
- }
-
+ }
+
if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
KnownZero, KnownOne, TLO, Depth+1))
return true;
@@ -1421,7 +1426,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
unsigned ShAmt = SA->getZExtValue();
unsigned VTSize = VT.getSizeInBits();
SDValue InOp = Op.getOperand(0);
-
+
// If the shift count is an invalid immediate, don't do anything.
if (ShAmt >= BitWidth)
break;
@@ -1438,20 +1443,20 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SHL;
- }
-
+ }
+
SDValue NewSA =
TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
InOp.getOperand(0), NewSA));
}
- }
-
+ }
+
// Compute the new bits that are at the top now.
if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
@@ -1472,7 +1477,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
EVT VT = Op.getValueType();
unsigned ShAmt = SA->getZExtValue();
-
+
// If the shift count is an invalid immediate, don't do anything.
if (ShAmt >= BitWidth)
break;
@@ -1484,21 +1489,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
if (HighBits.intersects(NewMask))
InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
-
+
if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
-
+
// Handle the sign bit, adjusted to where it is now in the mask.
APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
-
+
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
Op.getOperand(0),
Op.getOperand(1)));
} else if (KnownOne.intersects(SignBit)) { // New bits are known one.
@@ -1509,23 +1514,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::SIGN_EXTEND_INREG: {
EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- // Sign extension. Compute the demanded bits in the result that are not
+ // Sign extension. Compute the demanded bits in the result that are not
// present in the input.
APInt NewBits =
APInt::getHighBitsSet(BitWidth,
BitWidth - EVT.getScalarType().getSizeInBits());
-
+
// If none of the extended bits are demanded, eliminate the sextinreg.
if ((NewBits & NewMask) == 0)
return TLO.CombineTo(Op, Op.getOperand(0));
- APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits());
- InSignBit.zext(BitWidth);
+ APInt InSignBit =
+ APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
APInt InputDemandedBits =
APInt::getLowBitsSet(BitWidth,
EVT.getScalarType().getSizeInBits()) &
NewMask;
-
+
// Since the sign extended bits are demanded, we know that the sign
// bit is demanded.
InputDemandedBits |= InSignBit;
@@ -1533,16 +1538,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
-
+
// If the input sign bit is known zero, convert this into a zero extension.
if (KnownZero.intersects(InSignBit))
- return TLO.CombineTo(Op,
+ return TLO.CombineTo(Op,
TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
-
+
if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
KnownOne |= NewBits;
KnownZero &= ~NewBits;
@@ -1555,23 +1560,22 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::ZERO_EXTEND: {
unsigned OperandBitWidth =
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
- APInt InMask = NewMask;
- InMask.trunc(OperandBitWidth);
-
+ APInt InMask = NewMask.trunc(OperandBitWidth);
+
// If none of the top bits are demanded, convert this into an any_extend.
APInt NewBits =
APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
if (!NewBits.intersects(NewMask))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
- Op.getValueType(),
+ Op.getValueType(),
Op.getOperand(0)));
-
+
if (SimplifyDemandedBits(Op.getOperand(0), InMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
KnownZero |= NewBits;
break;
}
@@ -1581,31 +1585,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
APInt NewBits = ~InMask & NewMask;
-
+
// If none of the top bits are demanded, convert this into an any_extend.
if (NewBits == 0)
return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
Op.getValueType(),
Op.getOperand(0)));
-
+
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
APInt InDemandedBits = InMask & NewMask;
InDemandedBits |= InSignBit;
- InDemandedBits.trunc(InBits);
-
- if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
+ InDemandedBits = InDemandedBits.trunc(InBits);
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
-
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+
// If the sign bit is known zero, convert this to a zero extend.
if (KnownZero.intersects(InSignBit))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
- Op.getValueType(),
+ Op.getValueType(),
Op.getOperand(0)));
-
+
// If the sign bit is known one, the top bits match.
if (KnownOne.intersects(InSignBit)) {
KnownOne |= NewBits;
@@ -1619,14 +1623,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::ANY_EXTEND: {
unsigned OperandBitWidth =
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
- APInt InMask = NewMask;
- InMask.trunc(OperandBitWidth);
+ APInt InMask = NewMask.trunc(OperandBitWidth);
if (SimplifyDemandedBits(Op.getOperand(0), InMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
break;
}
case ISD::TRUNCATE: {
@@ -1634,14 +1637,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// zero/one bits live out.
unsigned OperandBitWidth =
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
- APInt TruncMask = NewMask;
- TruncMask.zext(OperandBitWidth);
+ APInt TruncMask = NewMask.zext(OperandBitWidth);
if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- KnownZero.trunc(BitWidth);
- KnownOne.trunc(BitWidth);
-
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
if (Op.getOperand(0).getNode()->hasOneUse()) {
@@ -1661,25 +1663,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
OperandBitWidth - BitWidth);
- HighBits = HighBits.lshr(ShAmt->getZExtValue());
- HighBits.trunc(BitWidth);
+ HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
- Op.getValueType(),
+ Op.getValueType(),
In.getOperand(0));
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
Op.getValueType(),
- NewTrunc,
+ NewTrunc,
In.getOperand(1)));
}
break;
}
}
-
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
break;
}
case ISD::AssertZext: {
@@ -1689,7 +1690,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (SimplifyDemandedBits(Op.getOperand(0), NewMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth,
@@ -1697,7 +1698,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownZero |= ~InMask & NewMask;
break;
}
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
#if 0
// If this is an FP->Int bitcast and if the sign bit is the only thing that
// is demanded, turn this into a FGETSIGN.
@@ -1709,7 +1710,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
- SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
Op.getOperand(0));
unsigned ShVal = Op.getValueType().getSizeInBits()-1;
SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy());
@@ -1742,21 +1743,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
break;
}
-
+
// If we know the value of all of the demanded bits, return this as a
// constant.
if ((NewMask & (KnownZero|KnownOne)) == NewMask)
return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
-
+
return false;
}
-/// computeMaskedBitsForTargetNode - Determine which of the bits specified
-/// in Mask are known to be either zero or one and return them in the
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
-void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
const APInt &Mask,
- APInt &KnownZero,
+ APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
@@ -1817,7 +1818,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
(KnownOne.countPopulation() == 1);
}
-/// SimplifySetCC - Try to simplify a setcc built with the specified operands
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands
/// and cc. If it is unable to simplify it, return a null SDValue.
SDValue
TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -1869,6 +1870,30 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ SDValue CTPOP = N0;
+ // Look through truncs that don't change the value of a ctpop.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
+ CTPOP = N0.getOperand(0);
+
+ if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
+ (N0 == CTPOP || N0.getValueType().getSizeInBits() >
+ Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
+ EVT CTVT = CTPOP.getValueType();
+ SDValue CTOp = CTPOP.getOperand(0);
+
+ // (ctpop x) u< 2 -> (x & x-1) == 0
+ // (ctpop x) u> 1 -> (x & x-1) != 0
+ if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
+ DAG.getConstant(1, CTVT));
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
+ ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+ return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC);
+ }
+
+ // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
+ }
+
// If the LHS is '(and load, const)', the RHS is 0,
// the test is for equality or unsigned, and all 1 bits of the const are
// in the same partial word, see if we can shorten the load.
@@ -1884,7 +1909,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (!Lod->isVolatile() && Lod->isUnindexed()) {
unsigned origWidth = N0.getValueType().getSizeInBits();
unsigned maskWidth = origWidth;
- // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
+ // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
// 8 bits, but have to be careful...
if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
origWidth = Lod->getMemoryVT().getSizeInBits();
@@ -1916,10 +1941,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
DAG.getConstant(bestOffset, PtrType));
unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
- Lod->getSrcValue(),
- Lod->getSrcValueOffset() + bestOffset,
+ Lod->getPointerInfo().getWithOffset(bestOffset),
false, false, NewAlign);
- return DAG.getSetCC(dl, VT,
+ return DAG.getSetCC(dl, VT,
DAG.getNode(ISD::AND, dl, newVT, NewLoad,
DAG.getConstant(bestMask.trunc(bestWidth),
newVT)),
@@ -1969,7 +1993,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(isOperationLegal(ISD::SETCC, newVT) &&
getCondCodeAction(Cond, newVT)==Legal))
return DAG.getSetCC(dl, VT, N0.getOperand(0),
- DAG.getConstant(APInt(C1).trunc(InSize), newVT),
+ DAG.getConstant(C1.trunc(InSize), newVT),
Cond);
break;
}
@@ -1987,7 +2011,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// the sign extension, it is impossible for both sides to be equal.
if (C1.getMinSignedBits() > ExtSrcTyBits)
return DAG.getConstant(Cond == ISD::SETNE, VT);
-
+
SDValue ZextOp;
EVT Op0Ty = N0.getOperand(0).getValueType();
if (Op0Ty == ExtSrcTy) {
@@ -2000,10 +2024,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(ZextOp.getNode());
// Otherwise, make this a use of a zext.
- return DAG.getSetCC(dl, VT, ZextOp,
+ return DAG.getSetCC(dl, VT, ZextOp,
DAG.getConstant(C1 & APInt::getLowBitsSet(
ExtDstTyBits,
- ExtSrcTyBits),
+ ExtSrcTyBits),
ExtDstTy),
Cond);
} else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
@@ -2013,16 +2037,16 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
if (TrueWhenTrue)
- return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
// Invert the condition.
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- CC = ISD::getSetCCInverse(CC,
+ CC = ISD::getSetCCInverse(CC,
N0.getOperand(0).getValueType().isInteger());
return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
}
if ((N0.getOpcode() == ISD::XOR ||
- (N0.getOpcode() == ISD::AND &&
+ (N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR &&
N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
isa<ConstantSDNode>(N0.getOperand(1)) &&
@@ -2038,7 +2062,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (N0.getOpcode() == ISD::XOR)
Val = N0.getOperand(0);
else {
- assert(N0.getOpcode() == ISD::AND &&
+ assert(N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR);
// ((X^1)&1)^1 -> X & 1
Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
@@ -2082,7 +2106,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
}
-
+
APInt MinVal, MaxVal;
unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
if (ISD::isSignedIntSetCC(Cond)) {
@@ -2097,7 +2121,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true
// X >= C0 --> X > (C0-1)
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(C1-1, N1.getValueType()),
(Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
}
@@ -2105,7 +2129,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true
// X <= C0 --> X < (C0+1)
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(C1+1, N1.getValueType()),
(Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
}
@@ -2128,12 +2152,12 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If we have setult X, 1, turn it into seteq X, 0
if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(MinVal, N0.getValueType()),
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MinVal, N0.getValueType()),
ISD::SETEQ);
// If we have setugt X, Max-1, turn it into seteq X, Max
else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(MaxVal, N0.getValueType()),
ISD::SETEQ);
@@ -2141,9 +2165,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// by changing cc.
// SETUGT X, SINTMAX -> SETLT X, 0
- if (Cond == ISD::SETUGT &&
+ if (Cond == ISD::SETUGT &&
C1 == APInt::getSignedMaxValue(OperandBitSize))
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(0, N1.getValueType()),
ISD::SETLT);
@@ -2203,7 +2227,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getUNDEF(VT);
}
}
-
+
// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
// constant if knowing that the operand is non-nan is enough. We prefer to
// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
@@ -2278,14 +2302,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (DAG.isCommutativeBinOp(N0.getOpcode())) {
// If X op Y == Y op X, try other combinations.
if (N0.getOperand(0) == N1.getOperand(1))
- return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
Cond);
if (N0.getOperand(1) == N1.getOperand(0))
- return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
Cond);
}
}
-
+
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
// Turn (X+C1) == C2 --> X == C2-C1
@@ -2295,7 +2319,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
LHSR->getAPIntValue(),
N0.getValueType()), Cond);
}
-
+
// Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
if (N0.getOpcode() == ISD::XOR)
// If we know that all of the inverted bits are zero, don't bother
@@ -2308,7 +2332,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getValueType()),
Cond);
}
-
+
// Turn (C1-X) == C2 --> X == C1-C2
if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
@@ -2319,7 +2343,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getValueType()),
Cond);
}
- }
+ }
}
// Simplify (X+Z) == X --> Z == 0
@@ -2334,7 +2358,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
// (Z-X) == X --> Z == X<<1
SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
- N1,
+ N1,
DAG.getConstant(1, getShiftAmountTy()));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(SH.getNode());
@@ -2356,7 +2380,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
} else if (N1.getNode()->hasOneUse()) {
assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
// X == (Z-X) --> X<<1 == Z
- SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
DAG.getConstant(1, getShiftAmountTy()));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(SH.getNode());
@@ -2443,7 +2467,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
/// node is a GlobalAddress + offset.
-bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
+bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
int64_t &Offset) const {
if (isa<GlobalAddressSDNode>(N)) {
GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
@@ -2469,6 +2493,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
}
}
}
+
return false;
}
@@ -2497,7 +2522,10 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
return C_Memory;
case 'i': // Simple Integer or Relocatable Constant
case 'n': // Simple Integer
+ case 'E': // Floating Point Constant
+ case 'F': // Floating Point Constant
case 's': // Relocatable Constant
+ case 'p': // Address.
case 'X': // Allow ANY value.
case 'I': // Target registers.
case 'J':
@@ -2507,11 +2535,13 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
case 'N':
case 'O':
case 'P':
+ case '<':
+ case '>':
return C_Other;
}
}
-
- if (Constraint.size() > 1 && Constraint[0] == '{' &&
+
+ if (Constraint.size() > 1 && Constraint[0] == '{' &&
Constraint[Constraint.size()-1] == '}')
return C_Register;
return C_Unknown;
@@ -2550,7 +2580,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// is possible and fine if either GV or C are missing.
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
-
+
// If we have "(add GV, C)", pull out GV/C
if (Op.getOpcode() == ISD::ADD) {
C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
@@ -2562,14 +2592,14 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
if (C == 0 || GA == 0)
C = 0, GA = 0;
}
-
+
// If we find a valid operand, map to the TargetXXX version so that the
// value itself doesn't get selected.
if (GA) { // Either &GV or &GV+C
if (ConstraintLetter != 'n') {
int64_t Offs = GA->getOffset();
if (C) Offs += C->getZExtValue();
- Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
C ? C->getDebugLoc() : DebugLoc(),
Op.getValueType(), Offs));
return;
@@ -2613,8 +2643,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
E = RI->regclass_end(); RCI != E; ++RCI) {
const TargetRegisterClass *RC = *RCI;
-
- // If none of the value types for this register class are valid, we
+
+ // If none of the value types for this register class are valid, we
// can't use it. For example, 64-bit reg classes on 32-bit targets.
bool isLegal = false;
for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
@@ -2624,16 +2654,16 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
break;
}
}
-
+
if (!isLegal) continue;
-
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I) {
if (RegName.equals_lower(RI->getName(*I)))
return std::make_pair(*I, RC);
}
}
-
+
return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
}
@@ -2655,6 +2685,186 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
}
+/// ParseConstraints - Split up the constraint string from the inline
+/// assembly value into the specific constraints and their prefixes,
+/// and also tie in the associated operand values.
+/// If this returns an empty vector, and if the constraint string itself
+/// isn't empty, there was an error parsing.
+TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
+ ImmutableCallSite CS) const {
+ /// ConstraintOperands - Information about all of the constraints.
+ AsmOperandInfoVector ConstraintOperands;
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+ unsigned maCount = 0; // Largest number of multiple alternative constraints.
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ InlineAsm::ConstraintInfoVector
+ ConstraintInfos = IA->ParseConstraints();
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+ AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Update multiple alternative constraint count.
+ if (OpInfo.multipleAlternatives.size() > maCount)
+ maCount = OpInfo.multipleAlternatives.size();
+
+ OpInfo.ConstraintVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() &&
+ "Bad inline asm!");
+ if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpInfo.ConstraintVT = getValueType(CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ if (OpInfo.CallOperandVal) {
+ const llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+ if (OpInfo.isIndirect) {
+ const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ report_fatal_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpInfo.ConstraintVT =
+ EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+ break;
+ }
+ } else if (dyn_cast<PointerType>(OpTy)) {
+ OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize());
+ } else {
+ OpInfo.ConstraintVT = EVT::getEVT(OpTy, true);
+ }
+ }
+ }
+
+ // If we have multiple alternative constraints, select the best alternative.
+ if (ConstraintInfos.size()) {
+ if (maCount) {
+ unsigned bestMAIndex = 0;
+ int bestWeight = -1;
+ // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
+ int weight = -1;
+ unsigned maIndex;
+ // Compute the sums of the weights for each alternative, keeping track
+ // of the best (highest weight) one so far.
+ for (maIndex = 0; maIndex < maCount; ++maIndex) {
+ int weightSum = 0;
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+ if (OpInfo.Type == InlineAsm::isClobber)
+ continue;
+
+ // If this is an output operand with a matching input operand,
+ // look up the matching input. If their types mismatch, e.g. one
+ // is an integer, the other is floating point, or their sizes are
+ // different, flag it as an maCantMatch.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ weightSum = -1; // Can't match.
+ break;
+ }
+ }
+ }
+ weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
+ if (weight == -1) {
+ weightSum = -1;
+ break;
+ }
+ weightSum += weight;
+ }
+ // Update best.
+ if (weightSum > bestWeight) {
+ bestWeight = weightSum;
+ bestMAIndex = maIndex;
+ }
+ }
+
+ // Now select chosen alternative in each constraint.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
+ if (cInfo.Type == InlineAsm::isClobber)
+ continue;
+ cInfo.selectAlternative(bestMAIndex);
+ }
+ }
+ }
+
+ // Check and hook up tied operands, choose constraint code to use.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ }
+
+ }
+ }
+
+ return ConstraintOperands;
+}
+
+
/// getConstraintGenerality - Return an integer indicating how general CT
/// is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
@@ -2672,6 +2882,79 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
}
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getMultipleConstraintMatchWeight(
+ AsmOperandInfo &info, int maIndex) const {
+ InlineAsm::ConstraintCodeVector *rCodes;
+ if (maIndex >= (int)info.multipleAlternatives.size())
+ rCodes = &info.Codes;
+ else
+ rCodes = &info.multipleAlternatives[maIndex].Codes;
+ ConstraintWeight BestWeight = CW_Invalid;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
+ ConstraintWeight weight =
+ getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
+ if (weight > BestWeight)
+ BestWeight = weight;
+ }
+
+ return BestWeight;
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ case 'i': // immediate integer.
+ case 'n': // immediate integer with a known value.
+ if (isa<ConstantInt>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 's': // non-explicit intregal immediate.
+ if (isa<GlobalValue>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 'E': // immediate float if host format.
+ case 'F': // immediate float.
+ if (isa<ConstantFP>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case '<': // memory operand with autodecrement.
+ case '>': // memory operand with autoincrement.
+ case 'm': // memory operand.
+ case 'o': // offsettable memory operand
+ case 'V': // non-offsettable memory operand
+ weight = CW_Memory;
+ break;
+ case 'r': // general register.
+ case 'g': // general register, memory operand or immediate integer.
+ // note: Clang converts "g" to "imr".
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'X': // any operand.
+ default:
+ weight = CW_Default;
+ break;
+ }
+ return weight;
+}
+
/// ChooseConstraint - If there are multiple different constraints that we
/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
/// This is somewhat tricky: constraints fall into four classes:
@@ -2721,12 +3004,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
break;
}
}
-
+
// Things with matching constraints can only be registers, per gcc
// documentation. This mainly affects "g" constraints.
if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
continue;
-
+
// This constraint letter is more general than the previous one, use it.
int Generality = getConstraintGenerality(CType);
if (Generality > BestGenerality) {
@@ -2735,7 +3018,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
BestGenerality = Generality;
}
}
-
+
OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
OpInfo.ConstraintType = BestType;
}
@@ -2744,10 +3027,10 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
/// type to use for the specific AsmOperandInfo, setting
/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
- SDValue Op,
+ SDValue Op,
SelectionDAG *DAG) const {
assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
-
+
// Single-letter constraints ('r') are very common.
if (OpInfo.Codes.size() == 1) {
OpInfo.ConstraintCode = OpInfo.Codes[0];
@@ -2755,7 +3038,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
} else {
ChooseConstraint(OpInfo, *this, Op, DAG);
}
-
+
// 'X' matches anything.
if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
// Labels and constants are handled elsewhere ('X' is the only thing
@@ -2766,7 +3049,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
OpInfo.CallOperandVal = v;
return;
}
-
+
// Otherwise, try to resolve it to something we know about by looking at
// the actual operand type.
if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
@@ -2782,7 +3065,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
-bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
const Type *Ty) const {
// The default implementation of this implements a conservative RISCy, r+r and
// r+i addr mode.
@@ -2790,12 +3073,12 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
// Allows a sign-extended 16-bit immediate field.
if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
return false;
-
+
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
-
- // Only support r+r,
+
+ // Only support r+r,
switch (AM.Scale) {
case 0: // "r+i" or just "i", depending on HasBaseReg.
break;
@@ -2810,7 +3093,7 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
// Allow 2*r as r+r.
break;
}
-
+
return true;
}
@@ -2818,19 +3101,19 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
std::vector<SDNode*>* Created) const {
EVT VT = N->getValueType(0);
DebugLoc dl= N->getDebugLoc();
-
+
// Check to see if we can do this.
// FIXME: We should be more aggressive here.
if (!isTypeLegal(VT))
return SDValue();
-
+
APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
APInt::ms magics = d.magic();
-
+
// Multiply the numerator (operand 0) by the magic value
// FIXME: We should support doing a MUL in a wider type
SDValue Q;
@@ -2844,7 +3127,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
else
return SDValue(); // No mulhs or equvialent
// If d > 0 and m < 0, add the numerator
- if (d.isStrictlyPositive() && magics.m.isNegative()) {
+ if (d.isStrictlyPositive() && magics.m.isNegative()) {
Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
if (Created)
Created->push_back(Q.getNode());
@@ -2857,7 +3140,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
}
// Shift right algebraic if shift value is nonzero
if (magics.s > 0) {
- Q = DAG.getNode(ISD::SRA, dl, VT, Q,
+ Q = DAG.getNode(ISD::SRA, dl, VT, Q,
DAG.getConstant(magics.s, getShiftAmountTy()));
if (Created)
Created->push_back(Q.getNode());
@@ -2908,20 +3191,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
if (magics.a == 0) {
assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
"We shouldn't generate an undefined shift!");
- return DAG.getNode(ISD::SRL, dl, VT, Q,
+ return DAG.getNode(ISD::SRL, dl, VT, Q,
DAG.getConstant(magics.s, getShiftAmountTy()));
} else {
SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
if (Created)
Created->push_back(NPQ.getNode());
- NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
DAG.getConstant(1, getShiftAmountTy()));
if (Created)
Created->push_back(NPQ.getNode());
NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
if (Created)
Created->push_back(NPQ.getNode());
- return DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ return DAG.getNode(ISD::SRL, dl, VT, NPQ,
DAG.getConstant(magics.s-1, getShiftAmountTy()));
}
}
OpenPOWER on IntegriCloud