diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG')
10 files changed, 408 insertions, 239 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index eb16095..2e09ec0 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -205,6 +205,7 @@ namespace { SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); SDValue visitCTPOP(SDNode *N); SDValue visitSELECT(SDNode *N); + SDValue visitVSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); SDValue visitSIGN_EXTEND(SDNode *N); @@ -243,7 +244,6 @@ namespace { SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); - SDValue visitMEMBARRIER(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); @@ -1127,6 +1127,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); case ISD::CTPOP: return visitCTPOP(N); case ISD::SELECT: return visitSELECT(N); + case ISD::VSELECT: return visitVSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); @@ -1165,7 +1166,6 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); - case ISD::MEMBARRIER: return visitMEMBARRIER(N); } return SDValue(); } @@ -4164,6 +4164,46 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVSELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + DebugLoc DL = N->getDebugLoc(); + + // Canonicalize integer abs. + // vselect (setg[te] X, 0), X, -X -> + // vselect (setgt X, -1), X, -X -> + // vselect (setl[te] X, 0), -X, X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N0.getOpcode() == ISD::SETCC) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + bool isAbs = false; + bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); + + if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || + (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && + N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) + isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); + else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && + N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) + isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); + + if (isAbs) { + EVT VT = LHS.getValueType(); + SDValue Shift = DAG.getNode( + ISD::SRA, DL, VT, LHS, + DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4453,7 +4493,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::SETCC) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. - if (VT.isVector() && !LegalOperations) { + if (VT.isVector() && !LegalOperations && + TLI.getBooleanContents(true) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { EVT N0VT = N0.getOperand(0).getValueType(); // On some architectures (such as SSE/NEON/etc) the SETCC result type is // of the same size as the compared operands. Only optimize sext(setcc()) @@ -7110,25 +7152,40 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && "Expected BasePtr operand"); - APInt OV = - cast<ConstantSDNode>(Offset)->getAPIntValue(); - if (AM == ISD::PRE_DEC) - OV = -OV; + // We need to replace ptr0 in the following expression: + // x0 * offset0 + y0 * ptr0 = t0 + // knowing that + // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) + // + // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the + // indexed load/store and the expresion that needs to be re-written. + // + // Therefore, we have: + // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 ConstantSDNode *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); - APInt CNV = CN->getAPIntValue(); - if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) - CNV += OV; - else - CNV -= OV; + int X0, X1, Y0, Y1; + APInt Offset0 = CN->getAPIntValue(); + APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); - SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0); - SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0)); - if (OffsetIdx == 0) - std::swap(NewOp1, NewOp2); + X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; + Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; + X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; + Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; - SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(), + unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; + + APInt CNV = Offset0; + if (X0 < 0) CNV = -CNV; + if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; + else CNV = CNV - Offset1; + + // We can now generate the new expression. + SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0)); + SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); + + SDValue NewUse = DAG.getNode(Opcode, OtherUses[i]->getDebugLoc(), OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); @@ -9065,6 +9122,51 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(N->getValueType(0)); + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR + // nodes often generate nop CONCAT_VECTOR nodes. + // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that + // place the incoming vectors at the exact same location. + SDValue SingleSource = SDValue(); + unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + + if (Op.getOpcode() == ISD::UNDEF) + continue; + + // Check if this is the identity extract: + if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + + // Find the single incoming vector for the extract_subvector. + if (SingleSource.getNode()) { + if (Op.getOperand(0) != SingleSource) + return SDValue(); + } else { + SingleSource = Op.getOperand(0); + + // Check the source type is the same as the type of the result. + // If not, this concat may extend the vector, so we can not + // optimize it away. + if (SingleSource.getValueType() != N->getValueType(0)) + return SDValue(); + } + + unsigned IdentityIndex = i * PartNumElem; + ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + // The extract index must be constant. + if (!CS) + return SDValue(); + + // Check that we are reading from the identity index. + if (CS->getZExtValue() != IdentityIndex) + return SDValue(); + } + + if (SingleSource.getNode()) + return SingleSource; + return SDValue(); } @@ -9125,6 +9227,44 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return SDValue(); } +// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat. +static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + + SmallVector<SDValue, 4> Ops; + EVT ConcatVT = N0.getOperand(0).getValueType(); + unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); + unsigned NumConcats = NumElts / NumElemsPerConcat; + + // Look at every vector that's inserted. We're looking for exact + // subvector-sized copies from a concatenated vector + for (unsigned I = 0; I != NumConcats; ++I) { + // Make sure we're dealing with a copy. + unsigned Begin = I * NumElemsPerConcat; + if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) + return SDValue(); + + for (unsigned J = 1; J != NumElemsPerConcat; ++J) { + if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + return SDValue(); + } + + unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; + if (FirstElt < N0.getNumOperands()) + Ops.push_back(N0.getOperand(FirstElt)); + else + Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + } + + return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(), + Ops.size()); +} + SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -9226,6 +9366,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } + if (N0.getOpcode() == ISD::CONCAT_VECTORS && + Level < AfterLegalizeVectorOps && + (N1.getOpcode() == ISD::UNDEF || + (N1.getOpcode() == ISD::CONCAT_VECTORS && + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { + SDValue V = partitionShuffleOfConcats(N, DAG); + + if (V.getNode()) + return V; + } + // If this shuffle node is simply a swizzle of another shuffle node, // and it reverses the swizzle of the previous shuffle then we can // optimize shuffle(shuffle(x, undef), undef) -> x. @@ -9262,59 +9413,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) { - if (!TLI.getShouldFoldAtomicFences()) - return SDValue(); - - SDValue atomic = N->getOperand(0); - switch (atomic.getOpcode()) { - case ISD::ATOMIC_CMP_SWAP: - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - break; - default: - return SDValue(); - } - - SDValue fence = atomic.getOperand(0); - if (fence.getOpcode() != ISD::MEMBARRIER) - return SDValue(); - - switch (atomic.getOpcode()) { - case ISD::ATOMIC_CMP_SWAP: - return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), - fence.getOperand(0), - atomic.getOperand(1), atomic.getOperand(2), - atomic.getOperand(3)), atomic.getResNo()); - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), - fence.getOperand(0), - atomic.getOperand(1), atomic.getOperand(2)), - atomic.getResNo()); - default: - return SDValue(); - } -} - /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform /// an AND to a vector_shuffle with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 9ac738e..288499a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1505,3 +1505,61 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { return true; } + +bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { + assert(LI->hasOneUse() && + "tryToFoldLoad expected a LoadInst with a single use"); + // We know that the load has a single use, but don't know what it is. If it + // isn't one of the folded instructions, then we can't succeed here. Handle + // this by scanning the single-use users of the load until we get to FoldInst. + unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. + + const Instruction *TheUser = LI->use_back(); + while (TheUser != FoldInst && // Scan up until we find FoldInst. + // Stay in the right block. + TheUser->getParent() == FoldInst->getParent() && + --MaxUsers) { // Don't scan too far. + // If there are multiple or no uses of this instruction, then bail out. + if (!TheUser->hasOneUse()) + return false; + + TheUser = TheUser->use_back(); + } + + // If we didn't find the fold instruction, then we failed to collapse the + // sequence. + if (TheUser != FoldInst) + return false; + + // Don't try to fold volatile loads. Target has to deal with alignment + // constraints. + if (LI->isVolatile()) + return false; + + // Figure out which vreg this is going into. If there is no assigned vreg yet + // then there actually was no reference to it. Perhaps the load is referenced + // by a dead instruction. + unsigned LoadReg = getRegForValue(LI); + if (LoadReg == 0) + return false; + + // We can't fold if this vreg has no uses or more than one use. Multiple uses + // may mean that the instruction got lowered to multiple MIs, or the use of + // the loaded value ended up being multiple operands of the result. + if (!MRI.hasOneUse(LoadReg)) + return false; + + MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg); + MachineInstr *User = &*RI; + + // Set the insertion point properly. Folding the load can cause generation of + // other random instructions (like sign extends) for addressing modes; make + // sure they get inserted in a logical place before the new instruction. + FuncInfo.InsertPt = User; + FuncInfo.MBB = User->getParent(); + + // Ask the target to try folding the load. + return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI); +} + + diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 51cc254..2a1d8c2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2759,8 +2759,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(DAG.getConstant(0, MVT::i32)); Results.push_back(Node->getOperand(0)); break; - case ISD::ATOMIC_FENCE: - case ISD::MEMBARRIER: { + case ISD::ATOMIC_FENCE: { // If the target didn't lower this, lower it to '__sync_synchronize()' call // FIXME: handle "fence singlethread" more efficiently. TargetLowering::ArgListTy Args; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index d19c13b..cd2f060 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -777,7 +777,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { Res = PromoteIntOp_CONVERT_RNDSAT(N); break; case ISD::INSERT_VECTOR_ELT: Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; - case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break; case ISD::SCALAR_TO_VECTOR: Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; case ISD::VSELECT: @@ -961,17 +960,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, N->getOperand(1), Idx), 0); } -SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { - SDValue NewOps[6]; - DebugLoc dl = N->getDebugLoc(); - NewOps[0] = N->getOperand(0); - for (unsigned i = 1; i < array_lengthof(NewOps); ++i) { - SDValue Flag = GetPromotedInteger(N->getOperand(i)); - NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1); - } - return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0); -} - SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote // the operand in place. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 54ea926..1c4274a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -270,7 +270,6 @@ private: SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N); SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); - SDValue PromoteIntOp_MEMBARRIER(SDNode *N); SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); @@ -582,6 +581,7 @@ private: SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); + SDValue SplitVecOp_TRUNCATE(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 5ec8535..04c6bfd 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1046,6 +1046,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; + case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break; case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); @@ -1062,7 +1063,6 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::FTRUNC: - case ISD::TRUNCATE: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: @@ -1272,8 +1272,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { DebugLoc DL = N->getDebugLoc(); - // The input operands all must have the same type, and we know the result the - // result type is valid. Convert this to a buildvector which extracts all the + // The input operands all must have the same type, and we know the result + // type is valid. Convert this to a buildvector which extracts all the // input elements. // TODO: If the input elements are power-two vectors, we could convert this to // a new CONCAT_VECTORS node with elements that are half-wide. @@ -1293,6 +1293,66 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { &Elts[0], Elts.size()); } +SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { + // The result type is legal, but the input type is illegal. If splitting + // ends up with the result type of each half still being legal, just + // do that. If, however, that would result in an illegal result type, + // we can try to get more clever with power-two vectors. Specifically, + // split the input type, but also widen the result element size, then + // concatenate the halves and truncate again. For example, consider a target + // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit + // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do: + // %inlo = v4i32 extract_subvector %in, 0 + // %inhi = v4i32 extract_subvector %in, 4 + // %lo16 = v4i16 trunc v4i32 %inlo + // %hi16 = v4i16 trunc v4i32 %inhi + // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16 + // %res = v8i8 trunc v8i16 %in16 + // + // Without this transform, the original truncate would end up being + // scalarized, which is pretty much always a last resort. + SDValue InVec = N->getOperand(0); + EVT InVT = InVec->getValueType(0); + EVT OutVT = N->getValueType(0); + unsigned NumElements = OutVT.getVectorNumElements(); + // Widening should have already made sure this is a power-two vector + // if we're trying to split it at all. assert() that's true, just in case. + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + + unsigned InElementSize = InVT.getVectorElementType().getSizeInBits(); + unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits(); + + // If the input elements are only 1/2 the width of the result elements, + // just use the normal splitting. Our trick only work if there's room + // to split more than once. + if (InElementSize <= OutElementSize * 2) + return SplitVecOp_UnaryOp(N); + DebugLoc DL = N->getDebugLoc(); + + // Extract the halves of the input via extract_subvector. + EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), NumElements/2); + SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, + DAG.getIntPtrConstant(0)); + SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, + DAG.getIntPtrConstant(NumElements/2)); + // Truncate them to 1/2 the element size. + EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, + NumElements/2); + SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec); + SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec); + // Concatenate them to get the full intermediate truncation result. + EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); + SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, + HalfHi); + // Now finish up by truncating all the way down to the original result + // type. This should normally be something that ends up being legal directly, + // but in theory if a target has very wide vectors and an annoyingly + // restricted set of legal types, this split can chain to build things up. + return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); +} + SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 6424431..15235c8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2785,7 +2785,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, } // Handle the scalar case first. - if (Outputs.size() == 1) + if (Scalar1 && Scalar2) return Outputs.back(); // Otherwise build a big vector out of the scalar elements we generated. @@ -5252,14 +5252,14 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) { SDVTList VTs = getVTList(VT); - return getMachineNode(Opcode, dl, VTs, 0, 0); + return getMachineNode(Opcode, dl, VTs, None); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5267,7 +5267,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5275,20 +5275,20 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2, Op3 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT); - return getMachineNode(Opcode, dl, VTs, Ops, NumOps); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); - return getMachineNode(Opcode, dl, VTs, 0, 0); + return getMachineNode(Opcode, dl, VTs, None); } MachineSDNode * @@ -5296,7 +5296,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5304,7 +5304,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5313,15 +5313,15 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2, Op3 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2); - return getMachineNode(Opcode, dl, VTs, Ops, NumOps); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5330,7 +5330,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5339,39 +5339,41 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2, Op3 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, EVT VT3, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3); - return getMachineNode(Opcode, dl, VTs, Ops, NumOps); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, EVT VT3, EVT VT4, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); - return getMachineNode(Opcode, dl, VTs, Ops, NumOps); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, ArrayRef<EVT> ResultTys, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size()); - return getMachineNode(Opcode, dl, VTs, Ops, NumOps); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> OpsArray) { bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; MachineSDNode *N; void *IP = 0; + const SDValue *Ops = OpsArray.data(); + unsigned NumOps = OpsArray.size(); if (DoCSE) { FoldingSetNodeID ID; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ce40cd6..67db211 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -314,7 +314,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, } else { Ctx.emitError(ErrMsg); } - report_fatal_error("Cannot handle scalar-to-vector conversion!"); + return DAG.getUNDEF(ValueVT); } if (ValueVT.getVectorNumElements() == 1 && @@ -5034,6 +5034,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return 0; } + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: + // Drop the intrinsic, but forward the value + setValue(&I, getValue(I.getOperand(0))); + return 0; case Intrinsic::var_annotation: // Discard annotate attributes return 0; @@ -5232,6 +5237,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Entry.isSRet = true; Entry.isNest = false; Entry.isByVal = false; + Entry.isReturned = false; Entry.Alignment = Align; Args.push_back(Entry); RetTy = Type::getVoidTy(FTy->getContext()); @@ -5249,13 +5255,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Entry.Node = ArgNode; Entry.Ty = V->getType(); unsigned attrInd = i - CS.arg_begin() + 1; - Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); - Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); - Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); - Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); - Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); - Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); - Entry.Alignment = CS.getParamAlignment(attrInd); + Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); + Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); + Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); + Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); + Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); + Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); + Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned); + Entry.Alignment = CS.getParamAlignment(attrInd); Args.push_back(Entry); } @@ -6169,10 +6176,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MatchedRegs.RegVTs.push_back(RegVT); MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); - i != e; ++i) - MatchedRegs.Regs.push_back - (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT))); - + i != e; ++i) { + if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) + MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); + else { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), "inline asm error: This value" + " type register class is not natively supported!"); + report_fatal_error("inline asm error: This value type register " + "class is not natively supported!"); + } + } // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag, CS.getInstruction()); @@ -6389,6 +6403,28 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair<SDValue, SDValue> TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { + // Handle the incoming return values from the call. + CLI.Ins.clear(); + SmallVector<EVT, 4> RetTys; + ComputeValueVTs(*this, CLI.RetTy, RetTys); + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } + } + // Handle all of the outgoing arguments. CLI.Outs.clear(); CLI.OutVals.clear(); @@ -6442,6 +6478,26 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (Args[i].isZExt) ExtendKind = ISD::ZERO_EXTEND; + // Conservatively only handle 'returned' on non-vectors for now + if (Args[i].isReturned && !Op.getValueType().isVector()) { + assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && + "unexpected use of 'returned'"); + // Before passing 'returned' to the target lowering code, ensure that + // either the register MVT and the actual EVT are the same size or that + // the return value and argument are extended in the same way; in these + // cases it's safe to pass the argument register value unchanged as the + // return register value (although it's at the target's option whether + // to do so) + // TODO: allow code generation to take advantage of partially preserved + // registers rather than clobbering the entire register when the + // parameter extension method is not compatible with the return + // extension method + if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || + (ExtendKind != ISD::ANY_EXTEND && + CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt)) + Flags.setReturned(); + } + getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); @@ -6461,28 +6517,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } } - // Handle the incoming return values from the call. - CLI.Ins.clear(); - SmallVector<EVT, 4> RetTys; - ComputeValueVTs(*this, CLI.RetTy, RetTys); - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); - for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT; - MyFlags.Used = CLI.IsReturnValueUsed; - if (CLI.RetSExt) - MyFlags.Flags.setSExt(); - if (CLI.RetZExt) - MyFlags.Flags.setZExt(); - if (CLI.IsInReg) - MyFlags.Flags.setInReg(); - CLI.Ins.push_back(MyFlags); - } - } - SmallVector<SDValue, 4> InVals; CLI.Chain = LowerCall(CLI, InVals); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 3b5823b..47b0391 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -54,7 +54,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::DELETED_NODE: return "<<Deleted Node!>>"; #endif case ISD::PREFETCH: return "Prefetch"; - case ISD::MEMBARRIER: return "MemBarrier"; case ISD::ATOMIC_FENCE: return "AtomicFence"; case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; case ISD::ATOMIC_SWAP: return "AtomicSwap"; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index eeea9e4..e21f26e 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -63,12 +63,16 @@ STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected"); STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); +STATISTIC(NumEntryBlocks, "Number of entry blocks encountered"); +STATISTIC(NumFastIselFailLowerArguments, + "Number of entry blocks where fast isel failed to lower arguments"); #ifndef NDEBUG static cl::opt<bool> EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden, cl::desc("Enable extra verbose messages in the \"fast\" " "instruction selector")); + // Terminators STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret"); STATISTIC(NumFastIselFailBr,"Fast isel fails on Br"); @@ -742,7 +746,7 @@ public: } // end anonymous namespace void SelectionDAGISel::DoInstructionSelection() { - DEBUG(errs() << "===== Instruction selection begins: BB#" + DEBUG(dbgs() << "===== Instruction selection begins: BB#" << FuncInfo->MBB->getNumber() << " '" << FuncInfo->MBB->getName() << "'\n"); @@ -801,7 +805,7 @@ void SelectionDAGISel::DoInstructionSelection() { CurDAG->setRoot(Dummy.getValue()); } - DEBUG(errs() << "===== Instruction selection ends:\n"); + DEBUG(dbgs() << "===== Instruction selection ends:\n"); PostprocessISelDAG(); } @@ -831,84 +835,6 @@ void SelectionDAGISel::PrepareEHLandingPad() { if (Reg) MBB->addLiveIn(Reg); } -/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified -/// load into the specified FoldInst. Note that we could have a sequence where -/// multiple LLVM IR instructions are folded into the same machineinstr. For -/// example we could have: -/// A: x = load i32 *P -/// B: y = icmp A, 42 -/// C: br y, ... -/// -/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and -/// any other folded instructions) because it is between A and C. -/// -/// If we succeed in folding the load into the operation, return true. -/// -bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, - const Instruction *FoldInst, - FastISel *FastIS) { - // We know that the load has a single use, but don't know what it is. If it - // isn't one of the folded instructions, then we can't succeed here. Handle - // this by scanning the single-use users of the load until we get to FoldInst. - unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. - - const Instruction *TheUser = LI->use_back(); - while (TheUser != FoldInst && // Scan up until we find FoldInst. - // Stay in the right block. - TheUser->getParent() == FoldInst->getParent() && - --MaxUsers) { // Don't scan too far. - // If there are multiple or no uses of this instruction, then bail out. - if (!TheUser->hasOneUse()) - return false; - - TheUser = TheUser->use_back(); - } - - // If we didn't find the fold instruction, then we failed to collapse the - // sequence. - if (TheUser != FoldInst) - return false; - - // Don't try to fold volatile loads. Target has to deal with alignment - // constraints. - if (LI->isVolatile()) return false; - - // Figure out which vreg this is going into. If there is no assigned vreg yet - // then there actually was no reference to it. Perhaps the load is referenced - // by a dead instruction. - unsigned LoadReg = FastIS->getRegForValue(LI); - if (LoadReg == 0) - return false; - - // Check to see what the uses of this vreg are. If it has no uses, or more - // than one use (at the machine instr level) then we can't fold it. - MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg); - if (RI == RegInfo->reg_end()) - return false; - - // See if there is exactly one use of the vreg. If there are multiple uses, - // then the instruction got lowered to multiple machine instructions or the - // use of the loaded value ended up being multiple operands of the result, in - // either case, we can't fold this. - MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI; - if (PostRI != RegInfo->reg_end()) - return false; - - assert(RI.getOperand().isUse() && - "The only use of the vreg must be a use, we haven't emitted the def!"); - - MachineInstr *User = &*RI; - - // Set the insertion point properly. Folding the load can cause generation of - // other random instructions (like sign extends) for addressing modes, make - // sure they get inserted in a logical place before the new instruction. - FuncInfo->InsertPt = User; - FuncInfo->MBB = User->getParent(); - - // Ask the target to try folding the load. - return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI); -} - /// isFoldedOrDeadInstruction - Return true if the specified instruction is /// side-effect free and is either dead or folded into a generated instruction. /// Return false if it needs to be emitted. @@ -1054,9 +980,12 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Emit code for any incoming arguments. This must happen before // beginning FastISel on the entry block. if (LLVMBB == &Fn.getEntryBlock()) { + ++NumEntryBlocks; + // Lower any arguments needed in this block if this is the entry block. if (!FastIS->LowerArguments()) { // Fast isel failed to lower these arguments + ++NumFastIselFailLowerArguments; if (EnableFastISelAbortArgs) llvm_unreachable("FastISel didn't lower all arguments"); @@ -1106,7 +1035,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && BeforeInst->hasOneUse() && - TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) { + FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) { // If we succeeded, don't re-select the load. BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); --NumFastIselRemaining; @@ -1178,8 +1107,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->recomputeInsertPt(); } else { // Lower any arguments needed in this block if this is the entry block. - if (LLVMBB == &Fn.getEntryBlock()) + if (LLVMBB == &Fn.getEntryBlock()) { + ++NumEntryBlocks; LowerArguments(Fn); + } } if (Begin != BI) @@ -1771,7 +1702,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, if (!NowDeadNodes.empty()) CurDAG->RemoveDeadNodes(NowDeadNodes); - DEBUG(errs() << "ISEL: Match complete!\n"); + DEBUG(dbgs() << "ISEL: Match complete!\n"); } enum ChainResult { @@ -2276,9 +2207,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, SmallVector<SDNode*, 3> ChainNodesMatched; SmallVector<SDNode*, 3> GlueResultNodesMatched; - DEBUG(errs() << "ISEL: Starting pattern match on root node: "; + DEBUG(dbgs() << "ISEL: Starting pattern match on root node: "; NodeToMatch->dump(CurDAG); - errs() << '\n'); + dbgs() << '\n'); // Determine where to start the interpreter. Normally we start at opcode #0, // but if the state machine starts with an OPC_SwitchOpcode, then we @@ -2290,7 +2221,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Already computed the OpcodeOffset table, just index into it. if (N.getOpcode() < OpcodeOffset.size()) MatcherIndex = OpcodeOffset[N.getOpcode()]; - DEBUG(errs() << " Initial Opcode index to " << MatcherIndex << "\n"); + DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n"); } else if (MatcherTable[0] == OPC_SwitchOpcode) { // Otherwise, the table isn't computed, but the state machine does start @@ -2357,7 +2288,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (!Result) break; - DEBUG(errs() << " Skipped scope entry (due to false predicate) at " + DEBUG(dbgs() << " Skipped scope entry (due to false predicate) at " << "index " << MatcherIndexOfPredicate << ", continuing at " << FailIndex << "\n"); ++NumDAGIselRetries; @@ -2487,7 +2418,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (CaseSize == 0) break; // Otherwise, execute the case we found. - DEBUG(errs() << " OpcodeSwitch from " << SwitchStart + DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart << " to " << MatcherIndex << "\n"); continue; } @@ -2519,7 +2450,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (CaseSize == 0) break; // Otherwise, execute the case we found. - DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() + DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() << "] from " << SwitchStart << " to " << MatcherIndex<<'\n'); continue; } @@ -2787,7 +2718,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // If this is a normal EmitNode command, just create the new node and // add the results to the RecordedNodes list. Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(), - VTList, Ops.data(), Ops.size()); + VTList, Ops); // Add all the non-glue/non-chain results to the RecordedNodes list. for (unsigned i = 0, e = VTs.size(); i != e; ++i) { @@ -2863,9 +2794,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, ->setMemRefs(MemRefs, MemRefs + NumMemRefs); } - DEBUG(errs() << " " + DEBUG(dbgs() << " " << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created") - << " node: "; Res->dump(CurDAG); errs() << "\n"); + << " node: "; Res->dump(CurDAG); dbgs() << "\n"); // If this was a MorphNodeTo then we're completely done! if (Opcode == OPC_MorphNodeTo) { @@ -2940,7 +2871,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // If the code reached this point, then the match failed. See if there is // another child to try in the current 'Scope', otherwise pop it until we // find a case to check. - DEBUG(errs() << " Match failed at index " << CurrentOpcodeIndex << "\n"); + DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n"); ++NumDAGIselRetries; while (1) { if (MatchScopes.empty()) { @@ -2960,7 +2891,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MatchedMemRefs.resize(LastScope.NumMatchedMemRefs); MatcherIndex = LastScope.FailIndex; - DEBUG(errs() << " Continuing at " << MatcherIndex << "\n"); + DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n"); InputChain = LastScope.InputChain; InputGlue = LastScope.InputGlue; |