summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib')
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp18
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp16
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp25
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h1
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp62
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp6
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp (renamed from contrib/llvm/lib/ExecutionEngine/RTDyldMemoryManager.cpp)0
-rw-r--r--contrib/llvm/lib/IR/Constants.cpp23
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp164
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp10
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp62
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp2
14 files changed, 333 insertions, 71 deletions
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 409adaf..3d9546f 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -538,9 +538,17 @@ Metadata *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
if (Metadata *MD = MDValuePtrs[Idx])
return MD;
- // Create and return a placeholder, which will later be RAUW'd.
- AnyFwdRefs = true;
+ // Track forward refs to be resolved later.
+ if (AnyFwdRefs) {
+ MinFwdRef = std::min(MinFwdRef, Idx);
+ MaxFwdRef = std::max(MaxFwdRef, Idx);
+ } else {
+ AnyFwdRefs = true;
+ MinFwdRef = MaxFwdRef = Idx;
+ }
++NumFwdRefs;
+
+ // Create and return a placeholder, which will later be RAUW'd.
Metadata *MD = MDNode::getTemporary(Context, None);
MDValuePtrs[Idx].reset(MD);
return MD;
@@ -556,11 +564,15 @@ void BitcodeReaderMDValueList::tryToResolveCycles() {
return;
// Resolve any cycles.
- for (auto &MD : MDValuePtrs) {
+ for (unsigned I = MinFwdRef, E = MaxFwdRef + 1; I != E; ++I) {
+ auto &MD = MDValuePtrs[I];
assert(!(MD && isa<MDNodeFwdDecl>(MD)) && "Unexpected forward reference");
if (auto *N = dyn_cast_or_null<UniquableMDNode>(MD))
N->resolveCycles();
}
+
+ // Make sure we return early again until there's another forward ref.
+ AnyFwdRefs = false;
}
Type *BitcodeReader::getTypeByID(unsigned ID) {
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h
index 7f7eb70..5090be4 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h
@@ -99,6 +99,8 @@ public:
class BitcodeReaderMDValueList {
unsigned NumFwdRefs;
bool AnyFwdRefs;
+ unsigned MinFwdRef;
+ unsigned MaxFwdRef;
std::vector<TrackingMDRef> MDValuePtrs;
LLVMContext &Context;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1bd6cff..afb986f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4842,7 +4842,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
- SDValue Data = MST->getData();
+ SDValue Data = MST->getValue();
SDLoc DL(N);
// If the MSTORE data type requires splitting and the mask is provided by a
@@ -4885,7 +4885,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, MST->getAAInfo(), MST->getRanges());
- Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, MMO);
+ Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
+ MST->isTruncatingStore());
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@@ -4897,7 +4898,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
SecondHalfAlignment, MST->getAAInfo(),
MST->getRanges());
- Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, MMO);
+ Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
+ MST->isTruncatingStore());
AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode());
@@ -4958,7 +4960,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());
- Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, MMO);
+ Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
+ ISD::NON_EXTLOAD);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@@ -4969,7 +4972,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
- Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, MMO);
+ Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
+ ISD::NON_EXTLOAD);
AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode());
@@ -9482,6 +9486,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
unsigned NewBW = NextPowerOf2(MSB - ShAmt);
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ // The narrowing should be profitable, the load/store operation should be
+ // legal (or custom) and the store size should be equal to the NewVT width.
while (NewBW < BitWidth &&
!(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
TLI.isNarrowingProfitable(VT, NewVT))) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 82b114b..a4e44cc 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -458,16 +458,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
- SDValue ExtMask = PromoteTargetBoolean(N->getMask(), NVT);
- SDLoc dl(N);
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(N->getPointerInfo(),
- MachineMemOperand::MOLoad, NVT.getStoreSize(),
- N->getAlignment(), N->getAAInfo(), N->getRanges());
+ SDValue Mask = N->getMask();
+ EVT NewMaskVT = getSetCCResultType(NVT);
+ if (NewMaskVT != N->getMask().getValueType())
+ Mask = PromoteTargetBoolean(Mask, NewMaskVT);
+ SDLoc dl(N);
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
- ExtMask, ExtSrc0, MMO);
+ Mask, ExtSrc0, N->getMemoryVT(),
+ N->getMemOperand(), ISD::SEXTLOAD);
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -1117,16 +1117,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
assert(OpNo == 2 && "Only know how to promote the mask!");
- SDValue DataOp = N->getData();
+ SDValue DataOp = N->getValue();
EVT DataVT = DataOp.getValueType();
SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType();
SDLoc dl(N);
+ bool TruncateStore = false;
if (!TLI.isTypeLegal(DataVT)) {
if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
DataOp = GetPromotedInteger(DataOp);
Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
+ TruncateStore = true;
}
else {
assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector &&
@@ -1156,10 +1158,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN
}
else
Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType());
- SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
- NewOps[2] = Mask;
- NewOps[3] = DataOp;
- return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
+ N->getMemoryVT(), N->getMemOperand(),
+ TruncateStore);
}
SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 1cd9f40..cef3fc9 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -659,6 +659,7 @@ private:
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
+ SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_Convert(SDNode *N);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 96b69ee..63671f7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -992,6 +992,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SDValue Ptr = MLD->getBasePtr();
SDValue Mask = MLD->getMask();
unsigned Alignment = MLD->getOriginalAlignment();
+ ISD::LoadExtType ExtType = MLD->getExtensionType();
// if Alignment is equal to the vector size,
// take the half of it for the second part
@@ -1015,7 +1016,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());
- Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, MMO);
+ Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
+ ExtType);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -1026,7 +1028,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
- Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, MMO);
+ Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
+ ExtType);
// Build a factor node to remember that this load is independent of the
@@ -1464,7 +1467,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
SDValue Mask = N->getMask();
- SDValue Data = N->getData();
+ SDValue Data = N->getValue();
EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment();
SDLoc DL(N);
@@ -1489,7 +1492,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
- Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, MMO);
+ Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
+ N->isTruncatingStore());
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@@ -1500,7 +1504,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
SecondHalfAlignment, N->getAAInfo(), N->getRanges());
- Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, MMO);
+ Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
+ N->isTruncatingStore());
// Build a factor node to remember that this store is independent of the
@@ -2412,6 +2417,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType();
SDValue Src0 = GetWidenedVector(N->getSrc0());
+ ISD::LoadExtType ExtType = N->getExtensionType();
SDLoc dl(N);
if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
@@ -2434,14 +2440,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
}
- // Rebuild memory operand because MemoryVT was changed
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(N->getPointerInfo(),
- MachineMemOperand::MOLoad, WidenVT.getStoreSize(),
- N->getAlignment(), N->getAAInfo(), N->getRanges());
-
SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
- Mask, Src0, MMO);
+ Mask, Src0, N->getMemoryVT(),
+ N->getMemOperand(), ExtType);
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -2593,6 +2594,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+ case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
case ISD::ANY_EXTEND:
@@ -2791,6 +2793,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
}
+SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
+ MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
+ SDValue Mask = MST->getMask();
+ EVT MaskVT = Mask.getValueType();
+ SDValue StVal = MST->getValue();
+ // Widen the value
+ SDValue WideVal = GetWidenedVector(StVal);
+ SDLoc dl(N);
+
+ if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
+ Mask = GetWidenedVector(Mask);
+ else {
+ // The mask should be widened as well
+ EVT BoolVT = getSetCCResultType(WideVal.getValueType());
+ // We can't use ModifyToType() because we should fill the mask with
+ // zeroes
+ unsigned WidenNumElts = BoolVT.getVectorNumElements();
+ unsigned MaskNumElts = MaskVT.getVectorNumElements();
+
+ unsigned NumConcat = WidenNumElts / MaskNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue ZeroVal = DAG.getConstant(0, MaskVT);
+ Ops[0] = Mask;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = ZeroVal;
+
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
+ }
+ assert(Mask.getValueType().getVectorNumElements() ==
+ WideVal.getValueType().getVectorNumElements() &&
+ "Mask and data vectors should have the same number of elements");
+ return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
+ Mask, MST->getMemoryVT(), MST->getMemOperand(),
+ false);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue InOp0 = GetWidenedVector(N->getOperand(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c819516..f75d5f4 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4924,15 +4924,15 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
SDValue
SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
- SDValue Ptr, SDValue Mask, SDValue Src0,
- MachineMemOperand *MMO) {
+ SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT,
+ MachineMemOperand *MMO, ISD::LoadExtType ExtTy) {
SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
ID.AddInteger(VT.getRawBits());
- ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED,
+ ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED,
MMO->isVolatile(),
MMO->isNonTemporal(),
MMO->isInvariant()));
@@ -4944,14 +4944,15 @@ SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
}
SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(),
dl.getDebugLoc(), Ops, 4, VTs,
- VT, MMO);
+ ExtTy, MemVT, MMO);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
- SDValue Ptr, SDValue Mask, MachineMemOperand *MMO) {
+ SDValue Ptr, SDValue Mask, EVT MemVT,
+ MachineMemOperand *MMO, bool isTrunc) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
EVT VT = Val.getValueType();
@@ -4970,7 +4971,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
}
SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(),
dl.getDebugLoc(), Ops, 4,
- VTs, VT, MMO);
+ VTs, isTrunc, MemVT, MMO);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 151bc72..d192910 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3667,7 +3667,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
getMachineMemOperand(MachinePointerInfo(PtrOperand),
MachineMemOperand::MOStore, VT.getStoreSize(),
Alignment, AAInfo);
- SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, MMO);
+ SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
+ MMO, false);
DAG.setRoot(StoreNode);
setValue(&I, StoreNode);
}
@@ -3706,7 +3707,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
MachineMemOperand::MOLoad, VT.getStoreSize(),
Alignment, AAInfo, Ranges);
- SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, MMO);
+ SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
+ ISD::NON_EXTLOAD);
SDValue OutChain = Load.getValue(1);
DAG.setRoot(OutChain);
setValue(&I, Load);
diff --git a/contrib/llvm/lib/ExecutionEngine/RTDyldMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
index 2a5e4f8..2a5e4f8 100644
--- a/contrib/llvm/lib/ExecutionEngine/RTDyldMemoryManager.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
diff --git a/contrib/llvm/lib/IR/Constants.cpp b/contrib/llvm/lib/IR/Constants.cpp
index 1d2602a..44052b2 100644
--- a/contrib/llvm/lib/IR/Constants.cpp
+++ b/contrib/llvm/lib/IR/Constants.cpp
@@ -257,11 +257,11 @@ Constant *Constant::getAggregateElement(unsigned Elt) const {
if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : nullptr;
- if (const ConstantAggregateZero *CAZ =dyn_cast<ConstantAggregateZero>(this))
- return CAZ->getElementValue(Elt);
+ if (const ConstantAggregateZero *CAZ = dyn_cast<ConstantAggregateZero>(this))
+ return Elt < CAZ->getNumElements() ? CAZ->getElementValue(Elt) : nullptr;
if (const UndefValue *UV = dyn_cast<UndefValue>(this))
- return UV->getElementValue(Elt);
+ return Elt < UV->getNumElements() ? UV->getElementValue(Elt) : nullptr;
if (const ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(this))
return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt)
@@ -764,6 +764,14 @@ Constant *ConstantAggregateZero::getElementValue(unsigned Idx) const {
return getStructElement(Idx);
}
+unsigned ConstantAggregateZero::getNumElements() const {
+ const Type *Ty = getType();
+ if (const auto *AT = dyn_cast<ArrayType>(Ty))
+ return AT->getNumElements();
+ if (const auto *VT = dyn_cast<VectorType>(Ty))
+ return VT->getNumElements();
+ return Ty->getStructNumElements();
+}
//===----------------------------------------------------------------------===//
// UndefValue Implementation
@@ -797,7 +805,14 @@ UndefValue *UndefValue::getElementValue(unsigned Idx) const {
return getStructElement(Idx);
}
-
+unsigned UndefValue::getNumElements() const {
+ const Type *Ty = getType();
+ if (const auto *AT = dyn_cast<ArrayType>(Ty))
+ return AT->getNumElements();
+ if (const auto *VT = dyn_cast<VectorType>(Ty))
+ return VT->getNumElements();
+ return Ty->getStructNumElements();
+}
//===----------------------------------------------------------------------===//
// ConstantXXX Classes
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 78a11e6..177299b 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1679,7 +1679,9 @@ void X86TargetLowering::resetOperationActions() {
setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::LOAD);
+ setTargetDAGCombine(ISD::MLOAD);
setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::MSTORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
@@ -24738,6 +24740,166 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// PerformMLOADCombine - Resolve extending loads
+static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N);
+ if (Mld->getExtensionType() != ISD::SEXTLOAD)
+ return SDValue();
+
+ EVT VT = Mld->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned NumElems = VT.getVectorNumElements();
+ EVT LdVT = Mld->getMemoryVT();
+ SDLoc dl(Mld);
+
+ assert(LdVT != VT && "Cannot extend to the same type");
+ unsigned ToSz = VT.getVectorElementType().getSizeInBits();
+ unsigned FromSz = LdVT.getVectorElementType().getSizeInBits();
+ // From, To sizes and ElemCount must be pow of two
+ assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
+ "Unexpected size for extending masked load");
+
+ unsigned SizeRatio = ToSz / FromSz;
+ assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits());
+
+ // Create a type on which we perform the shuffle
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
+ LdVT.getScalarType(), NumElems*SizeRatio);
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ // Convert Src0 value
+ SDValue WideSrc0 = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mld->getSrc0());
+ if (Mld->getSrc0().getOpcode() != ISD::UNDEF) {
+ SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i] = i * SizeRatio;
+
+ // Can't shuffle using an illegal type.
+ assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");
+ WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
+ DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
+ }
+ // Prepare the new mask
+ SDValue NewMask;
+ SDValue Mask = Mld->getMask();
+ if (Mask.getValueType() == VT) {
+ // Mask and original value have the same type
+ NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
+ SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i] = i * SizeRatio;
+ for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
+ ShuffleVec[i] = NumElems*SizeRatio;
+ NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
+ DAG.getConstant(0, WideVecVT),
+ &ShuffleVec[0]);
+ }
+ else {
+ assert(Mask.getValueType().getVectorElementType() == MVT::i1);
+ unsigned WidenNumElts = NumElems*SizeRatio;
+ unsigned MaskNumElts = VT.getVectorNumElements();
+ EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WidenNumElts);
+
+ unsigned NumConcat = WidenNumElts / MaskNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
+ Ops[0] = Mask;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = ZeroVal;
+
+ NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
+ }
+
+ SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
+ Mld->getBasePtr(), NewMask, WideSrc0,
+ Mld->getMemoryVT(), Mld->getMemOperand(),
+ ISD::NON_EXTLOAD);
+ SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd);
+ return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
+
+}
+/// PerformMSTORECombine - Resolve truncating stores
+static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
+ if (!Mst->isTruncatingStore())
+ return SDValue();
+
+ EVT VT = Mst->getValue().getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned NumElems = VT.getVectorNumElements();
+ EVT StVT = Mst->getMemoryVT();
+ SDLoc dl(Mst);
+
+ assert(StVT != VT && "Cannot truncate to the same type");
+ unsigned FromSz = VT.getVectorElementType().getSizeInBits();
+ unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
+
+ // From, To sizes and ElemCount must be pow of two
+ assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
+ "Unexpected size for truncating masked store");
+ // We are going to use the original vector elt for storing.
+ // Accumulated smaller vector elements must be a multiple of the store size.
+ assert (((NumElems * FromSz) % ToSz) == 0 &&
+ "Unexpected ratio for truncating masked store");
+
+ unsigned SizeRatio = FromSz / ToSz;
+ assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
+
+ // Create a type on which we perform the shuffle
+ EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
+ StVT.getScalarType(), NumElems*SizeRatio);
+
+ assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mst->getValue());
+ SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i] = i * SizeRatio;
+
+ // Can't shuffle using an illegal type.
+ assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");
+
+ SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
+ DAG.getUNDEF(WideVecVT),
+ &ShuffleVec[0]);
+
+ SDValue NewMask;
+ SDValue Mask = Mst->getMask();
+ if (Mask.getValueType() == VT) {
+ // Mask and original value have the same type
+ NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i] = i * SizeRatio;
+ for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
+ ShuffleVec[i] = NumElems*SizeRatio;
+ NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
+ DAG.getConstant(0, WideVecVT),
+ &ShuffleVec[0]);
+ }
+ else {
+ assert(Mask.getValueType().getVectorElementType() == MVT::i1);
+ unsigned WidenNumElts = NumElems*SizeRatio;
+ unsigned MaskNumElts = VT.getVectorNumElements();
+ EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WidenNumElts);
+
+ unsigned NumConcat = WidenNumElts / MaskNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
+ Ops[0] = Mask;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = ZeroVal;
+
+ NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
+ }
+
+ return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, Mst->getBasePtr(),
+ NewMask, StVT, Mst->getMemOperand(), false);
+}
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
@@ -25836,7 +25998,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
+ case ISD::MLOAD: return PerformMLOADCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
+ case ISD::MSTORE: return PerformMSTORECombine(N, DAG, Subtarget);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
diff --git a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 8509713..1f73cbc 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -403,7 +403,7 @@ bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction& Start,
const Instruction& End,
AliasAnalysis::Location
Loc) {
- return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Ref);
+ return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::ModRef);
}
///
@@ -414,6 +414,7 @@ bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction& Start,
StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
StoreInst *Store0) {
DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n");
+ BasicBlock *BB0 = Store0->getParent();
for (BasicBlock::reverse_iterator RBI = BB1->rbegin(), RBE = BB1->rend();
RBI != RBE; ++RBI) {
Instruction *Inst = &*RBI;
@@ -422,13 +423,14 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
continue;
StoreInst *Store1 = cast<StoreInst>(Inst);
- BasicBlock *BB0 = Store0->getParent();
AliasAnalysis::Location Loc0 = AA->getLocation(Store0);
AliasAnalysis::Location Loc1 = AA->getLocation(Store1);
if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
- !isStoreSinkBarrierInRange(*Store1, BB1->back(), Loc1) &&
- !isStoreSinkBarrierInRange(*Store0, BB0->back(), Loc0)) {
+ !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))),
+ BB1->back(), Loc1) &&
+ !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store0))),
+ BB0->back(), Loc0)) {
return Store1;
}
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index f12cd61..8a32215 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -55,7 +55,7 @@ STATISTIC(NumRuntimeUnrolled,
/// - Branch around the original loop if the trip count is less
/// than the unroll factor.
///
-static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
+static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
BasicBlock *OrigPH, BasicBlock *NewPH,
ValueToValueMapTy &VMap, Pass *P) {
@@ -105,12 +105,19 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
}
}
- // Create a branch around the orignal loop, which is taken if the
- // trip count is less than the unroll factor.
+ // Create a branch around the orignal loop, which is taken if there are no
+ // iterations remaining to be executed after running the prologue.
Instruction *InsertPt = PrologEnd->getTerminator();
+
+ assert(Count != 0 && "nonsensical Count!");
+
+ // If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1)
+ // (since Count is a power of 2). This means %xtraiter is (BECount + 1) and
+ // and all of the iterations of this loop were executed by the prologue. Note
+ // that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow.
Instruction *BrLoopExit =
- new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
- ConstantInt::get(TripCount->getType(), Count));
+ new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, BECount,
+ ConstantInt::get(BECount->getType(), Count - 1));
BasicBlock *Exit = L->getUniqueExitBlock();
assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
@@ -292,23 +299,28 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
// Only unroll loops with a computable trip count and the trip count needs
// to be an int value (allowing a pointer type is a TODO item)
- const SCEV *BECount = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
+ const SCEV *BECountSC = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BECountSC) ||
+ !BECountSC->getType()->isIntegerTy())
return false;
- // If BECount is INT_MAX, we can't compute trip-count without overflow.
- if (BECount->isAllOnesValue())
- return false;
+ unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
// Add 1 since the backedge count doesn't include the first loop iteration
const SCEV *TripCountSC =
- SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
+ SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
if (isa<SCEVCouldNotCompute>(TripCountSC))
return false;
// We only handle cases when the unroll factor is a power of 2.
// Count is the loop unroll factor, the number of extra copies added + 1.
- if ((Count & (Count-1)) != 0)
+ if (!isPowerOf2_32(Count))
+ return false;
+
+ // This constraint lets us deal with an overflowing trip count easily; see the
+ // comment on ModVal below. This check is equivalent to `Log2(Count) <
+ // BEWidth`.
+ if (static_cast<uint64_t>(Count) > (1ULL << BEWidth))
return false;
// If this loop is nested, then the loop unroller changes the code in
@@ -330,16 +342,23 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
SCEVExpander Expander(*SE, "loop-unroll");
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
PreHeaderBR);
+ Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
+ PreHeaderBR);
IRBuilder<> B(PreHeaderBR);
Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
- // Check if for no extra iterations, then jump to cloned/unrolled loop.
- // We have to check that the trip count computation didn't overflow when
- // adding one to the backedge taken count.
- Value *LCmp = B.CreateIsNotNull(ModVal, "lcmp.mod");
- Value *OverflowCheck = B.CreateIsNull(TripCount, "lcmp.overflow");
- Value *BranchVal = B.CreateOr(OverflowCheck, LCmp, "lcmp.or");
+ // If ModVal is zero, we know that either
+ // 1. there are no iteration to be run in the prologue loop
+ // OR
+ // 2. the addition computing TripCount overflowed
+ //
+ // If (2) is true, we know that TripCount really is (1 << BEWidth) and so the
+ // number of iterations that remain to be run in the original loop is a
+ // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
+ // explicitly check this above).
+
+ Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");
// Branch to either the extra iterations or the cloned/unrolled loop
// We will fix up the true branch label when adding loop body copies
@@ -362,10 +381,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
std::vector<BasicBlock *> NewBlocks;
ValueToValueMapTy VMap;
- // If unroll count is 2 and we can't overflow in tripcount computation (which
- // is BECount + 1), then we don't need a loop for prologue, and we can unroll
- // it. We can be sure that we don't overflow only if tripcount is a constant.
- bool UnrollPrologue = (Count == 2 && isa<ConstantInt>(TripCount));
+ bool UnrollPrologue = Count == 2;
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
// the loop, otherwise we create a cloned loop to execute the extra
@@ -391,7 +407,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
// Connect the prolog code to the original loop and update the
// PHI functions.
BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
- ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
+ ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
LPM->getAsPass());
NumRuntimeUnrolled++;
return true;
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 557304e..47b92a3 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1874,6 +1874,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// wide store needs to start at the last vector element.
PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+ Mask[Part] = reverseVector(Mask[Part]);
}
Value *VecPtr = Builder.CreateBitCast(PartPtr,
@@ -1902,6 +1903,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// wide load needs to start at the last vector element.
PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+ Mask[Part] = reverseVector(Mask[Part]);
}
Instruction* NewLI;
OpenPOWER on IntegriCloud