diff options
Diffstat (limited to 'contrib/llvm/lib/Target')
-rw-r--r-- | contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 16 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 34 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp | 145 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 66 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td | 3 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp | 2 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp | 26 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp | 1 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/X86/X86.td | 14 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp | 4 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/X86/X86ISelLowering.cpp | 2 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 2 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/X86/X86Subtarget.cpp | 2 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/X86/X86Subtarget.h | 8 |
14 files changed, 146 insertions, 179 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 19f51ce..399b5ee 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6287,6 +6287,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, AArch64CC::CondCode CC, bool NoNans, EVT VT, SDLoc dl, SelectionDAG &DAG) { EVT SrcVT = LHS.getValueType(); + assert(VT.getSizeInBits() == SrcVT.getSizeInBits() && + "function only supposed to emit natural comparisons"); BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode()); APInt CnstBits(VT.getSizeInBits(), 0); @@ -6381,13 +6383,15 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op, ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); + EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger(); SDLoc dl(Op); if (LHS.getValueType().getVectorElementType().isInteger()) { assert(LHS.getValueType() == RHS.getValueType()); AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); - return EmitVectorComparison(LHS, RHS, AArch64CC, false, Op.getValueType(), - dl, DAG); + SDValue Cmp = + EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG); + return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType()); } assert(LHS.getValueType().getVectorElementType() == MVT::f32 || @@ -6401,19 +6405,21 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op, bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath; SDValue Cmp = - EmitVectorComparison(LHS, RHS, CC1, NoNaNs, Op.getValueType(), dl, DAG); + EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG); if (!Cmp.getNode()) return SDValue(); if (CC2 != AArch64CC::AL) { SDValue Cmp2 = - EmitVectorComparison(LHS, RHS, CC2, NoNaNs, Op.getValueType(), dl, DAG); + EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG); if (!Cmp2.getNode()) return SDValue(); - Cmp = DAG.getNode(ISD::OR, dl, Cmp.getValueType(), Cmp, Cmp2); + Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2); } + Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType()); + if (ShouldInvert) return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType()); diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index e0397cc..c1244225 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2400,7 +2400,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. - // For CMPri, we need to check Sub, thus we can't return here. + // For CMPri w/ CmpValue != 0, a Sub may still be a candidate. + // Thus we cannot return here. if (CmpInstr->getOpcode() == ARM::CMPri || CmpInstr->getOpcode() == ARM::t2CMPri) MI = nullptr; @@ -2479,8 +2480,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, case ARM::t2EORrr: case ARM::t2EORri: { // Scan forward for the use of CPSR - // When checking against MI: if it's a conditional code requires - // checking of V bit, then this is not safe to do. + // When checking against MI: if it's a conditional code that requires + // checking of the V bit or C bit, then this is not safe to do. // It is safe to remove CmpInstr if CPSR is redefined or killed. // If we are done with the basic block, we need to check whether CPSR is // live-out. @@ -2547,19 +2548,30 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, OperandsToUpdate.push_back( std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); } - } else + } else { + // No Sub, so this is x = <op> y, z; cmp x, 0. switch (CC) { - default: + case ARMCC::EQ: // Z + case ARMCC::NE: // Z + case ARMCC::MI: // N + case ARMCC::PL: // N + case ARMCC::AL: // none // CPSR can be used multiple times, we should continue. break; - case ARMCC::VS: - case ARMCC::VC: - case ARMCC::GE: - case ARMCC::LT: - case ARMCC::GT: - case ARMCC::LE: + case ARMCC::HS: // C + case ARMCC::LO: // C + case ARMCC::VS: // V + case ARMCC::VC: // V + case ARMCC::HI: // C Z + case ARMCC::LS: // C Z + case ARMCC::GE: // N V + case ARMCC::LT: // N V + case ARMCC::GT: // Z N V + case ARMCC::LE: // Z N V + // The instruction uses the V bit or C bit which is not safe. return false; } + } } } diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index f92f257..a1de5ef 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -565,7 +565,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM) setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::FP_TO_UINT); setTargetDAGCombine(ISD::FDIV); - setTargetDAGCombine(ISD::LOAD); // It is legal to extload from v4i8 to v4i16 or v4i32. MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, @@ -4488,6 +4487,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); + EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger(); EVT VT = Op.getValueType(); ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); SDLoc dl(Op); @@ -4517,8 +4517,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { TmpOp0 = Op0; TmpOp1 = Op1; Opc = ISD::OR; - Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); - Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); + Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); + Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1); break; case ISD::SETUO: Invert = true; // Fallthrough case ISD::SETO: @@ -4526,8 +4526,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { TmpOp0 = Op0; TmpOp1 = Op1; Opc = ISD::OR; - Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); - Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); + Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); + Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1); break; } } else { @@ -4561,8 +4561,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { Opc = ARMISD::VTST; - Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); - Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); + Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0)); + Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1)); Invert = !Invert; } } @@ -4588,22 +4588,24 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { if (SingleOp.getNode()) { switch (Opc) { case ARMISD::VCEQ: - Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; + Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break; case ARMISD::VCGE: - Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; + Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break; case ARMISD::VCLEZ: - Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; + Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break; case ARMISD::VCGT: - Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; + Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break; case ARMISD::VCLTZ: - Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; + Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break; default: - Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); } } else { - Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); } + Result = DAG.getSExtOrTrunc(Result, dl, VT); + if (Invert) Result = DAG.getNOT(dl, Result, VT); @@ -8877,18 +8879,17 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { DAG.getUNDEF(VT), NewMask.data()); } -/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, -/// NEON load/store intrinsics, and generic vector load/stores, to merge -/// base address updates. -/// For generic load/stores, the memory type is assumed to be a vector. -/// The caller is assumed to have checked legality. +/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and +/// NEON load/store intrinsics to merge base address updates. static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + SelectionDAG &DAG = DCI.DAG; bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || N->getOpcode() == ISD::INTRINSIC_W_CHAIN); - bool isStore = N->getOpcode() == ISD::STORE; - unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1); + unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); SDValue Addr = N->getOperand(AddrOpIdx); // Search for a use of the address operand that is an increment. @@ -8949,10 +8950,6 @@ static SDValue CombineBaseUpdate(SDNode *N, case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; - case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD; - NumVecs = 1; isLaneOp = false; break; - case ISD::STORE: NewOpc = ARMISD::VST1_UPD; - NumVecs = 1; isLoad = false; isLaneOp = false; break; } } @@ -8960,11 +8957,8 @@ static SDValue CombineBaseUpdate(SDNode *N, EVT VecTy; if (isLoad) VecTy = N->getValueType(0); - else if (isIntrinsic) - VecTy = N->getOperand(AddrOpIdx+1).getValueType(); else - VecTy = N->getOperand(1).getValueType(); - + VecTy = N->getOperand(AddrOpIdx+1).getValueType(); unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; if (isLaneOp) NumBytes /= VecTy.getVectorNumElements(); @@ -8981,70 +8975,25 @@ static SDValue CombineBaseUpdate(SDNode *N, continue; } - EVT AlignedVecTy = VecTy; - - // If this is a less-than-standard-aligned load/store, change the type to - // match the standard alignment. - // The alignment is overlooked when selecting _UPD variants; and it's - // easier to introduce bitcasts here than fix that. - // There are 3 ways to get to this base-update combine: - // - intrinsics: they are assumed to be properly aligned (to the standard - // alignment of the memory type), so we don't need to do anything. - // - ARMISD::VLDx nodes: they are only generated from the aforementioned - // intrinsics, so, likewise, there's nothing to do. - // - generic load/store instructions: the alignment is specified as an - // explicit operand, rather than implicitly as the standard alignment - // of the memory type (like the intrisics). We need to change the - // memory type to match the explicit alignment. That way, we don't - // generate non-standard-aligned ARMISD::VLDx nodes. - if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N)) { - unsigned Alignment = LSN->getAlignment(); - if (Alignment == 0) - Alignment = 1; - if (Alignment < VecTy.getScalarSizeInBits() / 8) { - MVT EltTy = MVT::getIntegerVT(Alignment * 8); - assert(NumVecs == 1 && "Unexpected multi-element generic load/store."); - assert(!isLaneOp && "Unexpected generic load/store lane."); - unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8); - AlignedVecTy = MVT::getVectorVT(EltTy, NumElts); - } - } - // Create the new updating load/store node. - // First, create an SDVTList for the new updating node's results. EVT Tys[6]; unsigned NumResultVecs = (isLoad ? NumVecs : 0); unsigned n; for (n = 0; n < NumResultVecs; ++n) - Tys[n] = AlignedVecTy; + Tys[n] = VecTy; Tys[n++] = MVT::i32; Tys[n] = MVT::Other; SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2)); - - // Then, gather the new node's operands. SmallVector<SDValue, 8> Ops; Ops.push_back(N->getOperand(0)); // incoming chain Ops.push_back(N->getOperand(AddrOpIdx)); Ops.push_back(Inc); - if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) { - // Try to match the intrinsic's signature - Ops.push_back(StN->getValue()); - Ops.push_back(DAG.getConstant(StN->getAlignment(), MVT::i32)); - } else { - for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) - Ops.push_back(N->getOperand(i)); - } - - // If this is a non-standard-aligned store, the penultimate operand is the - // stored value. Bitcast it to the aligned type. - if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) { - SDValue &StVal = Ops[Ops.size()-2]; - StVal = DAG.getNode(ISD::BITCAST, SDLoc(N), AlignedVecTy, StVal); + for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { + Ops.push_back(N->getOperand(i)); } - - MemSDNode *MemInt = cast<MemSDNode>(N); + MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, - Ops, AlignedVecTy, + Ops, MemInt->getMemoryVT(), MemInt->getMemOperand()); // Update the uses. @@ -9052,14 +9001,6 @@ static SDValue CombineBaseUpdate(SDNode *N, for (unsigned i = 0; i < NumResultVecs; ++i) { NewResults.push_back(SDValue(UpdN.getNode(), i)); } - - // If this is an non-standard-aligned load, the first result is the loaded - // value. Bitcast it to the expected result type. - if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) { - SDValue &LdVal = NewResults[0]; - LdVal = DAG.getNode(ISD::BITCAST, SDLoc(N), VecTy, LdVal); - } - NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain DCI.CombineTo(N, NewResults); DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); @@ -9069,14 +9010,6 @@ static SDValue CombineBaseUpdate(SDNode *N, return SDValue(); } -static SDValue PerformVLDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) - return SDValue(); - - return CombineBaseUpdate(N, DCI); -} - /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and @@ -9190,18 +9123,6 @@ static SDValue PerformVDUPLANECombine(SDNode *N, return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } -static SDValue PerformLOADCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - EVT VT = N->getValueType(0); - - // If this is a legal vector load, try to combine it into a VLD1_UPD. - if (ISD::isNormalLoad(N) && VT.isVector() && - DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) - return CombineBaseUpdate(N, DCI); - - return SDValue(); -} - /// PerformSTORECombine - Target-specific dag combine xforms for /// ISD::STORE. static SDValue PerformSTORECombine(SDNode *N, @@ -9340,11 +9261,6 @@ static SDValue PerformSTORECombine(SDNode *N, St->getAAInfo()); } - // If this is a legal vector store, try to combine it into a VST1_UPD. - if (ISD::isNormalStore(N) && VT.isVector() && - DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) - return CombineBaseUpdate(N, DCI); - return SDValue(); } @@ -9938,11 +9854,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); - case ISD::LOAD: return PerformLOADCombine(N, DCI); case ARMISD::VLD2DUP: case ARMISD::VLD3DUP: case ARMISD::VLD4DUP: - return PerformVLDCombine(N, DCI); + return CombineBaseUpdate(N, DCI); case ARMISD::BUILD_VECTOR: return PerformARMBUILD_VECTORCombine(N, DCI); case ISD::INTRINSIC_VOID: @@ -9962,7 +9877,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case Intrinsic::arm_neon_vst2lane: case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: - return PerformVLDCombine(N, DCI); + return CombineBaseUpdate(N, DCI); default: break; } break; diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 96f3b4e..56de9d2 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -9195,34 +9195,48 @@ static const struct { const uint64_t Enabled; const uint64_t Disabled; } FPUs[] = { - {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON}, - {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON}, - {ARM::VFPV3, ARM::FeatureVFP2 | ARM::FeatureVFP3, ARM::FeatureNEON}, - {ARM::VFPV3_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureD16, - ARM::FeatureNEON}, - {ARM::VFPV4, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4, - ARM::FeatureNEON}, - {ARM::VFPV4_D16, - ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureD16, - ARM::FeatureNEON}, - {ARM::FPV5_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | - ARM::FeatureFPARMv8 | ARM::FeatureD16, - ARM::FeatureNEON | ARM::FeatureCrypto}, - {ARM::FP_ARMV8, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | - ARM::FeatureFPARMv8, - ARM::FeatureNEON | ARM::FeatureCrypto}, - {ARM::NEON, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureNEON, 0}, - {ARM::NEON_VFPV4, - ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureNEON, - 0}, - {ARM::NEON_FP_ARMV8, - ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + {/* ID */ ARM::VFP, + /* Enabled */ ARM::FeatureVFP2, + /* Disabled */ ARM::FeatureNEON}, + {/* ID */ ARM::VFPV2, + /* Enabled */ ARM::FeatureVFP2, + /* Disabled */ ARM::FeatureNEON}, + {/* ID */ ARM::VFPV3, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3, + /* Disabled */ ARM::FeatureNEON | ARM::FeatureD16}, + {/* ID */ ARM::VFPV3_D16, + /* Enable */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureD16, + /* Disabled */ ARM::FeatureNEON}, + {/* ID */ ARM::VFPV4, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4, + /* Disabled */ ARM::FeatureNEON | ARM::FeatureD16}, + {/* ID */ ARM::VFPV4_D16, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + ARM::FeatureD16, + /* Disabled */ ARM::FeatureNEON}, + {/* ID */ ARM::FPV5_D16, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + ARM::FeatureFPARMv8 | ARM::FeatureD16, + /* Disabled */ ARM::FeatureNEON | ARM::FeatureCrypto}, + {/* ID */ ARM::FP_ARMV8, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + ARM::FeatureFPARMv8, + /* Disabled */ ARM::FeatureNEON | ARM::FeatureCrypto | ARM::FeatureD16}, + {/* ID */ ARM::NEON, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureNEON, + /* Disabled */ ARM::FeatureD16}, + {/* ID */ ARM::NEON_VFPV4, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + ARM::FeatureNEON, + /* Disabled */ ARM::FeatureD16}, + {/* ID */ ARM::NEON_FP_ARMV8, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureFPARMv8 | ARM::FeatureNEON, - ARM::FeatureCrypto}, - {ARM::CRYPTO_NEON_FP_ARMV8, - ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | + /* Disabled */ ARM::FeatureCrypto | ARM::FeatureD16}, + {/* ID */ ARM::CRYPTO_NEON_FP_ARMV8, + /* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, - 0}, + /* Disabled */ ARM::FeatureD16}, {ARM::SOFTVFP, 0, 0}, }; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index d597cdc..ad3bc1d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3134,7 +3134,8 @@ def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins), def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src), "icbi $src", IIC_LdStICBI, []>; -def EIEIO : XForm_24_eieio<31, 854, (outs), (ins), +// We used to have EIEIO as value but E[0-9A-Z] is a reserved name +def EnforceIEIO : XForm_24_eieio<31, 854, (outs), (ins), "eieio", IIC_LdStLoad, []>; def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L), diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp index e7bc006..0bc62d0 100644 --- a/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp @@ -100,7 +100,7 @@ bool AMDGPUTTI::hasBranchDivergence() const { return true; } void AMDGPUTTI::getUnrollingPreferences(const Function *, Loop *L, UnrollingPreferences &UP) const { UP.Threshold = 300; // Twice the default. - UP.Count = UINT_MAX; + UP.MaxCount = UINT_MAX; UP.Partial = true; // TODO: Do we want runtime unrolling? diff --git a/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp b/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp index 91eb60b..c99219d 100644 --- a/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp +++ b/contrib/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp @@ -14,6 +14,7 @@ #include "AMDGPU.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" @@ -66,6 +67,8 @@ class SIAnnotateControlFlow : public FunctionPass { DominatorTree *DT; StackVector Stack; + LoopInfo *LI; + bool isTopOfStack(BasicBlock *BB); Value *popSaved(); @@ -99,6 +102,7 @@ public: } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LoopInfo>(); AU.addRequired<DominatorTreeWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); FunctionPass::getAnalysisUsage(AU); @@ -277,10 +281,25 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { Term->setCondition(CallInst::Create(Loop, Arg, "", Term)); push(Term->getSuccessor(0), Arg); -} - -/// \brief Close the last opened control flow +}/// \brief Close the last opened control flow void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) { + llvm::Loop *L = LI->getLoopFor(BB); + + if (L && L->getHeader() == BB) { + // We can't insert an EndCF call into a loop header, because it will + // get executed on every iteration of the loop, when it should be + // executed only once before the loop. + SmallVector <BasicBlock*, 8> Latches; + L->getLoopLatches(Latches); + + std::vector<BasicBlock*> Preds; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end()) + Preds.push_back(*PI); + } + BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", this); + } + CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt()); } @@ -288,6 +307,7 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) { /// recognize if/then/else and loops. bool SIAnnotateControlFlow::runOnFunction(Function &F) { DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + LI = &getAnalysis<LoopInfo>(); for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()), E = df_end(&F.getEntryBlock()); I != E; ++I) { diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp index 0396bf3..58c2cd1 100644 --- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp @@ -266,6 +266,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, break; case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_V64_RESTORE: + case AMDGPU::SI_SPILL_V96_RESTORE: case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: { diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td index ab3319a..30b3b28 100644 --- a/contrib/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm/lib/Target/X86/X86.td @@ -132,9 +132,9 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", "Enable XOP instructions", [FeatureFMA4]>; -def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", - "HasVectorUAMem", "true", - "Allow unaligned memory operands on vector/SIMD instructions">; +def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", + "HasSSEUnalignedMem", "true", + "Allow unaligned memory operands with SSE instructions">; def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", "Enable AES instructions", [FeatureSSE2]>; @@ -309,7 +309,6 @@ class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ FeatureCMPXCHG16B, FeatureFastUAMem, FeatureSlowUAMem32, - FeatureVectorUAMem, FeaturePOPCNT, FeatureAES, FeaturePCLMUL @@ -322,7 +321,6 @@ class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ FeatureCMPXCHG16B, FeatureFastUAMem, FeatureSlowUAMem32, - FeatureVectorUAMem, FeaturePOPCNT, FeatureAES, FeaturePCLMUL, @@ -337,7 +335,6 @@ class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [ FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem, - FeatureVectorUAMem, FeaturePOPCNT, FeatureAES, FeaturePCLMUL, @@ -360,7 +357,6 @@ class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [ FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem, - FeatureVectorUAMem, FeaturePOPCNT, FeatureAES, FeaturePCLMUL, @@ -388,7 +384,7 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, - FeatureSlowIncDec, FeatureVectorUAMem]>; + FeatureSlowIncDec]>; def : KnightsLandingProc<"knl">; // FIXME: define SKX model @@ -399,7 +395,7 @@ class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, - FeatureSlowIncDec, FeatureSGX, FeatureVectorUAMem]>; + FeatureSlowIncDec, FeatureSGX]>; def : SkylakeProc<"skylake">; def : SkylakeProc<"skx">; // Legacy alias. diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp index 14a0f38..d06e94f 100644 --- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -688,11 +688,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals; for (const auto &Function : M) - if (Function.hasDLLExportStorageClass()) + if (Function.hasDLLExportStorageClass() && !Function.isDeclaration()) DLLExportedFns.push_back(getSymbol(&Function)); for (const auto &Global : M.globals()) - if (Global.hasDLLExportStorageClass()) + if (Global.hasDLLExportStorageClass() && !Global.isDeclaration()) DLLExportedGlobals.push_back(getSymbol(&Global)); for (const auto &Alias : M.aliases()) { diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index a1fd34e..78a11e6 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5473,6 +5473,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) { DecodePSHUFBMask(C, Mask); + if (Mask.empty()) + return false; break; } diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 76e8fad..7069bd6 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -424,7 +424,7 @@ def alignedloadv8i64 : PatFrag<(ops node:$ptr), // setting a feature bit in the processor (on startup, for example). // Opteron 10h and later implement such a feature. def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return Subtarget->hasVectorUAMem() + return Subtarget->hasSSEUnalignedMem() || cast<LoadSDNode>(N)->getAlignment() >= 16; }]>; diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp index e59395c..9831698 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp @@ -265,7 +265,7 @@ void X86Subtarget::initializeEnvironment() { IsSHLDSlow = false; IsUAMemFast = false; IsUAMem32Slow = false; - HasVectorUAMem = false; + HasSSEUnalignedMem = false; HasCmpxchg16b = false; UseLeaForSP = false; HasSlowDivide32 = false; diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h index 754b5b9..e0263d6 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.h +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h @@ -162,9 +162,9 @@ protected: /// True if unaligned 32-byte memory accesses are slow. bool IsUAMem32Slow; - /// HasVectorUAMem - True if SIMD operations can have unaligned memory - /// operands. This may require setting a feature bit in the processor. - bool HasVectorUAMem; + /// True if SSE operations can have unaligned memory operands. + /// This may require setting a configuration bit in the processor. + bool HasSSEUnalignedMem; /// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction; /// this is true for most x86-64 chips, but not the first AMD chips. @@ -378,7 +378,7 @@ public: bool isSHLDSlow() const { return IsSHLDSlow; } bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool isUnalignedMem32Slow() const { return IsUAMem32Slow; } - bool hasVectorUAMem() const { return HasVectorUAMem; } + bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } bool hasSlowDivide32() const { return HasSlowDivide32; } |