diff options
author | dim <dim@FreeBSD.org> | 2015-12-25 21:39:45 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2015-12-25 21:39:45 +0000 |
commit | 6f44a590dad07c47cdc2fc19861574af1da36963 (patch) | |
tree | 3570671d3fc79afee8f95c67867446e3151a9d94 /contrib | |
parent | 0efa1469be94566c09b9f4ce538c28e92d26026c (diff) | |
download | FreeBSD-src-6f44a590dad07c47cdc2fc19861574af1da36963.zip FreeBSD-src-6f44a590dad07c47cdc2fc19861574af1da36963.tar.gz |
Upgrade our copies of clang and llvm to 3.7.1 release. This is a
bugfix-only release, with no new features.
Please note that from 3.5.0 onwards, clang and llvm require C++11
support to build; see UPDATING for more information.
Diffstat (limited to 'contrib')
53 files changed, 518 insertions, 448 deletions
diff --git a/contrib/llvm/include/llvm-c/Core.h b/contrib/llvm/include/llvm-c/Core.h index 1529007..9dbcbfe 100644 --- a/contrib/llvm/include/llvm-c/Core.h +++ b/contrib/llvm/include/llvm-c/Core.h @@ -2675,7 +2675,8 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef, LLVMValueRef Fn, LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch, const char *Name); LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty, - unsigned NumClauses, const char *Name); + LLVMValueRef PersFn, unsigned NumClauses, + const char *Name); LLVMValueRef LLVMBuildResume(LLVMBuilderRef B, LLVMValueRef Exn); LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef); diff --git a/contrib/llvm/include/llvm/CodeGen/CommandFlags.h b/contrib/llvm/include/llvm/CodeGen/CommandFlags.h index 4b2e0b0..bedb7d5 100644 --- a/contrib/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/contrib/llvm/include/llvm/CodeGen/CommandFlags.h @@ -21,7 +21,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCTargetOptionsCommandFlags.h" -#include "llvm//MC/SubtargetFeature.h" +#include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Host.h" diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index 71c7781..a2b9316 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -169,7 +169,7 @@ void WinException::endFunction(const MachineFunction *MF) { Asm->OutStreamer->PopSection(); } - if (shouldEmitMoves) + if (shouldEmitMoves || shouldEmitPersonality) Asm->OutStreamer->EmitWinCFIEndProc(); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 21ab072..fbc8f1e 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -439,7 +439,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, ISD::ANY_EXTEND, dl, VT, Result); ValResult = Result; - ChainResult = Chain; + ChainResult = newLoad.getValue(1); return; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index a7392fa..54cfaf5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1010,6 +1010,8 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, // Calculate the element offset and add it to the pointer. unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. + assert(EltSize * 8 == EltVT.getSizeInBits() && + "Converting bits to bytes lost precision"); Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, DAG.getConstant(EltSize, dl, Index.getValueType())); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4348ab7..51cd661 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1528,9 +1528,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { if (CustomLowerNode(N, N->getValueType(0), true)) return SDValue(); - // Store the vector to the stack. - EVT EltVT = VecVT.getVectorElementType(); + // Make the vector elements byte-addressable if they aren't already. SDLoc dl(N); + EVT EltVT = VecVT.getVectorElementType(); + if (EltVT.getSizeInBits() < 8) { + SmallVector<SDValue, 4> ElementOps; + for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) { + ElementOps.push_back(DAG.getAnyExtOrTrunc( + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec, + DAG.getConstant(i, dl, MVT::i8)), + dl, MVT::i8)); + } + + EltVT = MVT::i8; + VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + VecVT.getVectorNumElements()); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, ElementOps); + } + + // Store the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo(), false, false, 0); diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp index adc620d..b553f11 100644 --- a/contrib/llvm/lib/IR/AsmWriter.cpp +++ b/contrib/llvm/lib/IR/AsmWriter.cpp @@ -794,6 +794,10 @@ void SlotTracker::processFunction() { ST_DEBUG("begin processFunction!\n"); fNext = 0; + // Process function metadata if it wasn't hit at the module-level. + if (!ShouldInitializeAllMetadata) + processFunctionMetadata(*TheFunction); + // Add all the function arguments with no names. for(Function::const_arg_iterator AI = TheFunction->arg_begin(), AE = TheFunction->arg_end(); AI != AE; ++AI) @@ -807,8 +811,6 @@ void SlotTracker::processFunction() { if (!BB.hasName()) CreateFunctionSlot(&BB); - processFunctionMetadata(*TheFunction); - for (auto &I : BB) { if (!I.getType()->isVoidTy() && !I.hasName()) CreateFunctionSlot(&I); @@ -836,11 +838,11 @@ void SlotTracker::processFunction() { void SlotTracker::processFunctionMetadata(const Function &F) { SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; - for (auto &BB : F) { - F.getAllMetadata(MDs); - for (auto &MD : MDs) - CreateMetadataSlot(MD.second); + F.getAllMetadata(MDs); + for (auto &MD : MDs) + CreateMetadataSlot(MD.second); + for (auto &BB : F) { for (auto &I : BB) processInstructionMetadata(I); } diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp index e0e729d..0eb88a9 100644 --- a/contrib/llvm/lib/IR/Core.cpp +++ b/contrib/llvm/lib/IR/Core.cpp @@ -2257,7 +2257,14 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn, } LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty, - unsigned NumClauses, const char *Name) { + LLVMValueRef PersFn, unsigned NumClauses, + const char *Name) { + // The personality used to live on the landingpad instruction, but now it + // lives on the parent function. For compatibility, take the provided + // personality and put it on the parent function. + if (PersFn) + unwrap(B)->GetInsertBlock()->getParent()->setPersonalityFn( + cast<Function>(unwrap(PersFn))); return wrap(unwrap(B)->CreateLandingPad(unwrap(Ty), NumClauses, Name)); } diff --git a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp index 149ec6a..25ae4ac 100644 --- a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -63,14 +63,21 @@ const char* LTOCodeGenerator::getVersionString() { #endif } +static void handleLTODiagnostic(const DiagnosticInfo &DI) { + DiagnosticPrinterRawOStream DP(errs()); + DI.print(DP); + errs() << "\n"; +} + LTOCodeGenerator::LTOCodeGenerator() - : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context)) { + : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context), + handleLTODiagnostic) { initializeLTOPasses(); } LTOCodeGenerator::LTOCodeGenerator(std::unique_ptr<LLVMContext> Context) : OwnedContext(std::move(Context)), Context(*OwnedContext), - IRLinker(new Module("ld-temp.o", *OwnedContext)) { + IRLinker(new Module("ld-temp.o", *OwnedContext), handleLTODiagnostic) { initializeLTOPasses(); } diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp index c601c56..a85796c 100644 --- a/contrib/llvm/lib/MC/MCContext.cpp +++ b/contrib/llvm/lib/MC/MCContext.cpp @@ -82,6 +82,7 @@ void MCContext::reset() { UsedNames.clear(); Symbols.clear(); + SectionSymbols.clear(); Allocator.Reset(); Instances.clear(); CompilationDir.clear(); diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 709d753..0a5309b 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -264,6 +264,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, for (const MachineBasicBlock &MBB : MF) { for (const MachineInstr &MI : MBB) { // TODO: CodeSize should account for multiple functions. + + // TODO: Should we count size of debug info? + if (MI.isDebugValue()) + continue; + + // FIXME: This is reporting 0 for many instructions. CodeSize += MI.getDesc().Size; unsigned numOperands = MI.getNumOperands(); diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 4a65bfc..57b7a73 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -134,13 +134,17 @@ static Value* GEPToVectorIndex(GetElementPtrInst *GEP) { // // TODO: Check isTriviallyVectorizable for calls and handle other // instructions. -static bool canVectorizeInst(Instruction *Inst) { +static bool canVectorizeInst(Instruction *Inst, User *User) { switch (Inst->getOpcode()) { case Instruction::Load: - case Instruction::Store: case Instruction::BitCast: case Instruction::AddrSpaceCast: return true; + case Instruction::Store: { + // Must be the stored pointer operand, not a stored value. + StoreInst *SI = cast<StoreInst>(Inst); + return SI->getPointerOperand() == User; + } default: return false; } @@ -166,7 +170,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { for (User *AllocaUser : Alloca->users()) { GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser); if (!GEP) { - if (!canVectorizeInst(cast<Instruction>(AllocaUser))) + if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca)) return false; WorkList.push_back(AllocaUser); @@ -184,7 +188,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { GEPVectorIdx[GEP] = Index; for (User *GEPUser : AllocaUser->users()) { - if (!canVectorizeInst(cast<Instruction>(GEPUser))) + if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser)) return false; WorkList.push_back(GEPUser); @@ -240,7 +244,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) { for (User *User : Val->users()) { if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end()) continue; - if (isa<CallInst>(User)) { + if (CallInst *CI = dyn_cast<CallInst>(User)) { + // TODO: We might be able to handle some cases where the callee is a + // constantexpr bitcast of a function. + if (!CI->getCalledFunction()) + return false; + WorkList.push_back(User); continue; } @@ -250,6 +259,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) { if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt) return false; + if (StoreInst *SI = dyn_cast_or_null<StoreInst>(UseInst)) { + // Reject if the stored value is not the pointer operand. + if (SI->getPointerOperand() != Val) + return false; + } + if (!User->getType()->isPointerTy()) continue; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td index 835a146..ba0490a 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td @@ -14,8 +14,7 @@ let Namespace = "AMDGPU" in { foreach Index = 0-15 in { - // Indices are used in a variety of ways here, so don't set a size/offset. - def sub#Index : SubRegIndex<-1, -1>; + def sub#Index : SubRegIndex<32, !shl(Index, 5)>; } def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">; diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 468563c..4434d9b 100644 --- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -71,12 +71,26 @@ void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm, } } +static unsigned getFixupKindNumBytes(unsigned Kind) { + switch (Kind) { + case FK_Data_1: + return 1; + case FK_Data_2: + return 2; + case FK_Data_4: + return 4; + case FK_Data_8: + return 8; + default: + llvm_unreachable("Unknown fixup kind!"); + } +} + void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const { switch ((unsigned)Fixup.getKind()) { - default: llvm_unreachable("Unknown fixup kind"); case AMDGPU::fixup_si_sopp_br: { uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset()); *Dst = (Value - 4) / 4; @@ -96,6 +110,24 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, *Dst = Value + 4; break; } + default: { + // FIXME: Copied from AArch64 + unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); + if (!Value) + return; // Doesn't change encoding. + MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind()); + + // Shift the value into position. + Value <<= Info.TargetOffset; + + unsigned Offset = Fixup.getOffset(); + assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + + // For each byte of the fragment that the fixup touches, mask in the + // bits from the fixup value. + for (unsigned i = 0; i != NumBytes; ++i) + Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); + } } } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 099b0b1..c2db9ff 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -157,6 +157,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); + setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); setOperationAction(ISD::LOAD, MVT::i1, Custom); @@ -2252,10 +2253,8 @@ MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG, SDValue Ptr) const { const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); - uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE | - 0xffffffff; // Size - return buildRSRC(DAG, DL, Ptr, 0, Rsrc); + return buildRSRC(DAG, DL, Ptr, 0, TII->getScratchRsrcWords23()); } SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG, diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 1891061..cfd2c42 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2778,3 +2778,16 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { return RsrcDataFormat; } + +uint64_t SIInstrInfo::getScratchRsrcWords23() const { + uint64_t Rsrc23 = getDefaultRsrcDataFormat() | + AMDGPU::RSRC_TID_ENABLE | + 0xffffffff; // Size; + + // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. + // Clear them unless we want a huge stride. + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; + + return Rsrc23; +} diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 015ea12..5053786 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -353,7 +353,7 @@ public: } uint64_t getDefaultRsrcDataFormat() const; - + uint64_t getScratchRsrcWords23() const; }; namespace AMDGPU { diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td index f78ffd7..e0eeea9 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1548,6 +1548,12 @@ defm V_WRITELANE_B32 : VOP2SI_3VI_m < // These instructions only exist on SI and CI let SubtargetPredicate = isSICI in { +let isCommutable = 1 in { +defm V_MAC_LEGACY_F32 : VOP2InstSI <vop2<0x6>, "v_mac_legacy_f32", + VOP_F32_F32_F32 +>; +} // End isCommutable = 1 + defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy >; @@ -1562,12 +1568,6 @@ defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>; } // End isCommutable = 1 } // End let SubtargetPredicate = SICI -let isCommutable = 1 in { -defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32", - VOP_F32_F32_F32 ->; -} // End isCommutable = 1 - defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32 >; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp index 0a7f684..2cd600d 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp @@ -135,8 +135,7 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { unsigned ScratchRsrcReg = RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); - uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | - 0xffffffff; // Size + uint64_t Rsrc23 = TII->getScratchRsrcWords23(); unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); @@ -152,11 +151,11 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { .addReg(ScratchRsrcReg, RegState::ImplicitDefine); BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) - .addImm(Rsrc & 0xffffffff) + .addImm(Rsrc23 & 0xffffffff) .addReg(ScratchRsrcReg, RegState::ImplicitDefine); BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) - .addImm(Rsrc >> 32) + .addImm(Rsrc23 >> 32) .addReg(ScratchRsrcReg, RegState::ImplicitDefine); // Scratch Offset diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 54c4d54..e9e8412 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -26,23 +26,25 @@ using namespace llvm; SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {} -BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { - BitVector Reserved(getNumRegs()); - Reserved.set(AMDGPU::EXEC); +void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { + MCRegAliasIterator R(Reg, this, true); - // EXEC_LO and EXEC_HI could be allocated and used as regular register, - // but this seems likely to result in bugs, so I'm marking them as reserved. - Reserved.set(AMDGPU::EXEC_LO); - Reserved.set(AMDGPU::EXEC_HI); + for (; R.isValid(); ++R) + Reserved.set(*R); +} +BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); - Reserved.set(AMDGPU::FLAT_SCR); - Reserved.set(AMDGPU::FLAT_SCR_LO); - Reserved.set(AMDGPU::FLAT_SCR_HI); + + // EXEC_LO and EXEC_HI could be allocated and used as regular register, but + // this seems likely to result in bugs, so I'm marking them as reserved. + reserveRegisterTuples(Reserved, AMDGPU::EXEC); + reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs - Reserved.set(AMDGPU::VGPR255); - Reserved.set(AMDGPU::VGPR254); + reserveRegisterTuples(Reserved, AMDGPU::VGPR254); + reserveRegisterTuples(Reserved, AMDGPU::VGPR255); // Tonga and Iceland can only allocate a fixed number of SGPRs due // to a hw bug. @@ -54,10 +56,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { for (unsigned i = Limit; i < NumSGPRs; ++i) { unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); - MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true); - - for (; R.isValid(); ++R) - Reserved.set(*R); + reserveRegisterTuples(Reserved, Reg); } } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index bfdb67c..7da6de2 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -23,7 +23,10 @@ namespace llvm { struct SIRegisterInfo : public AMDGPURegisterInfo { +private: + void reserveRegisterTuples(BitVector &, unsigned Reg) const; +public: SIRegisterInfo(); BitVector getReservedRegs(const MachineFunction &MF) const override; diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index f8f0eb2..cf6b892 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -9104,6 +9105,10 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) { return false; } + Triple T; + STI.setDefaultFeatures(T.getARMCPUForArch(Arch)); + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + getTargetStreamer().emitArch(ID); return false; } diff --git a/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp index d9e654c..9d5f1d4 100644 --- a/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -50,6 +50,7 @@ private: // Complex Pattern for address selection. bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset); }; } @@ -67,7 +68,7 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { Addr.getOpcode() == ISD::TargetGlobalAddress) return false; - // Addresses of the form FI+const or FI|const + // Addresses of the form Addr+const or Addr|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isInt<32>(CN->getSExtValue())) { @@ -89,6 +90,31 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { return true; } +// ComplexPattern used on BPF FI instruction +bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { + SDLoc DL(Addr); + + if (!CurDAG->isBaseWithConstantOffset(Addr)) + return false; + + // Addresses of the form Addr+const or Addr|const + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + if (isInt<32>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = + dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); + else + return false; + + Offset = CurDAG->getTargetConstant(CN->getSExtValue(), DL, MVT::i64); + return true; + } + + return false; +} + SDNode *BPFDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); @@ -104,13 +130,6 @@ SDNode *BPFDAGToDAGISel::Select(SDNode *Node) { // tablegen selection should be handled here. switch (Opcode) { default: break; - - case ISD::UNDEF: { - errs() << "BUG: "; Node->dump(CurDAG); errs() << '\n'; - report_fatal_error("shouldn't see UNDEF during Select"); - break; - } - case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); switch (IntNo) { diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp index 58498a1..7341828 100644 --- a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -102,6 +102,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BR_CC, MVT::i64, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::SETCC, MVT::i64, Expand); setOperationAction(ISD::SELECT, MVT::i64, Expand); @@ -128,9 +129,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SUBC, MVT::i64, Expand); setOperationAction(ISD::SUBE, MVT::i64, Expand); - // no UNDEF allowed - setOperationAction(ISD::UNDEF, MVT::i64, Expand); - setOperationAction(ISD::ROTR, MVT::i64, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td index 26b2cfe..6b73db8 100644 --- a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -54,7 +54,8 @@ def i64immSExt32 : PatLeaf<(imm), [{return isInt<32>(N->getSExtValue()); }]>; // Addressing modes. -def ADDRri : ComplexPattern<i64, 2, "SelectAddr", [frameindex], []>; +def ADDRri : ComplexPattern<i64, 2, "SelectAddr", [], []>; +def FIri : ComplexPattern<i64, 2, "SelectFIAddr", [add, or], []>; // Address operands def MEMri : Operand<i64> { @@ -260,6 +261,15 @@ def MOV_rr : MOV_RR<"mov">; def MOV_ri : MOV_RI<"mov">; } +def FI_ri + : InstBPF<(outs GPR:$dst), (ins MEMri:$addr), + "lea\t$dst, $addr", + [(set i64:$dst, FIri:$addr)]> { + // This is a tentative instruction, and will be replaced + // with MOV_rr and ADD_ri in PEI phase +} + + def LD_pseudo : InstBPF<(outs GPR:$dst), (ins i64imm:$pseudo, u64imm:$imm), "ld_pseudo\t$dst, $pseudo, $imm", diff --git a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp index 8f885c3..952615b 100644 --- a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp @@ -58,14 +58,13 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned FrameReg = getFrameRegister(MF); int FrameIndex = MI.getOperand(i).getIndex(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + MachineBasicBlock &MBB = *MI.getParent(); if (MI.getOpcode() == BPF::MOV_rr) { - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); MI.getOperand(i).ChangeToRegister(FrameReg, false); - - MachineBasicBlock &MBB = *MI.getParent(); unsigned reg = MI.getOperand(i - 1).getReg(); BuildMI(MBB, ++II, DL, TII.get(BPF::ADD_ri), reg) .addReg(reg) @@ -79,8 +78,24 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (!isInt<32>(Offset)) llvm_unreachable("bug in frame offset"); - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i + 1).ChangeToImmediate(Offset); + if (MI.getOpcode() == BPF::FI_ri) { + // architecture does not really support FI_ri, replace it with + // MOV_rr <target_reg>, frame_reg + // ADD_ri <target_reg>, imm + unsigned reg = MI.getOperand(i - 1).getReg(); + + BuildMI(MBB, ++II, DL, TII.get(BPF::MOV_rr), reg) + .addReg(FrameReg); + BuildMI(MBB, II, DL, TII.get(BPF::ADD_ri), reg) + .addReg(reg) + .addImm(Offset); + + // Remove FI_ri instruction + MI.eraseFromParent(); + } else { + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i + 1).ChangeToImmediate(Offset); + } } unsigned BPFRegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h index 6fe8f83..b3d861d 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h @@ -269,6 +269,14 @@ namespace llvm { unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; + /// Returns true if a cast between SrcAS and DestAS is a noop. + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Mips doesn't have any special address spaces so we just reserve + // the first 256 for software use (e.g. OpenCL) and treat casts + // between them as noops. + return SrcAS < 256 && DestAS < 256; + } + protected: SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const; diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index cb46d73..2ebfbd1 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -115,6 +115,11 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI, if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) continue; + // Also, we have to check that the register class of the operand + // contains the zero register. + if (!MRI->getRegClass(MO.getReg())->contains(ZeroReg)) + continue; + MO.setReg(ZeroReg); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 4444466..8e118ec 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -947,11 +947,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDISdtprelHA: - // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym> - // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha + // Transform: %Xd = ADDISdtprelHA %Xs, <ga:@sym> + // Into: %Xd = ADDIS8 %Xs, sym@dtprel@ha case PPC::ADDISdtprelHA32: { - // Transform: %Rd = ADDISdtprelHA32 %R3, <ga:@sym> - // Into: %Rd = ADDIS %R3, sym@dtprel@ha + // Transform: %Rd = ADDISdtprelHA32 %Rs, <ga:@sym> + // Into: %Rd = ADDIS %Rs, sym@dtprel@ha const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -962,7 +962,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { *OutStreamer, MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS) .addReg(MI->getOperand(0).getReg()) - .addReg(Subtarget->isPPC64() ? PPC::X3 : PPC::R3) + .addReg(MI->getOperand(1).getReg()) .addExpr(SymDtprel)); return; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp index baadf08..fd150be 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -197,10 +197,18 @@ static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { // Determining the address of a TLS variable results in a function call in // certain TLS models. static bool memAddrUsesCTR(const PPCTargetMachine *TM, - const llvm::Value *MemAddr) { + const Value *MemAddr) { const auto *GV = dyn_cast<GlobalValue>(MemAddr); - if (!GV) + if (!GV) { + // Recurse to check for constants that refer to TLS global variables. + if (const auto *CV = dyn_cast<Constant>(MemAddr)) + for (const auto &CO : CV->operands()) + if (memAddrUsesCTR(TM, CO)) + return true; + return false; + } + if (!GV->isThreadLocal()) return false; if (!TM) @@ -239,6 +247,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { switch (F->getIntrinsicID()) { default: continue; + // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr + // we're definitely using CTR. + case Intrinsic::ppc_is_decremented_ctr_nonzero: + case Intrinsic::ppc_mtctr: + return true; // VisualStudio defines setjmp as _setjmp #if defined(_MSC_VER) && defined(setjmp) && \ @@ -426,6 +439,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // Process nested loops first. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { MadeChange |= convertToCTRLoop(*I); + DEBUG(dbgs() << "Nested loop converted\n"); } // If a nested loop has been converted, then we can't convert this loop. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index b6025bf..9322268 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2570,13 +2570,25 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return nullptr; } // ISD::OR doesn't get all the bitfield insertion fun. - // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert + // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a + // bitfield insert. if (isInt32Immediate(N->getOperand(1), Imm) && N->getOperand(0).getOpcode() == ISD::OR && isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) { + // The idea here is to check whether this is equivalent to: + // (c1 & m) | (x & ~m) + // where m is a run-of-ones mask. The logic here is that, for each bit in + // c1 and c2: + // - if both are 1, then the output will be 1. + // - if both are 0, then the output will be 0. + // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will + // come from x. + // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will + // be 0. + // If that last condition is never the case, then we can form m from the + // bits that are the same between c1 and c2. unsigned MB, ME; - Imm = ~(Imm^Imm2); - if (isRunOfOnes(Imm, MB, ME)) { + if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) { SDValue Ops[] = { N->getOperand(0).getOperand(0), N->getOperand(0).getOperand(1), getI32Imm(0, dl), getI32Imm(MB, dl), @@ -2787,6 +2799,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Base, Offset; if (LD->isUnindexed() && + (LD->getMemoryVT() == MVT::f64 || + LD->getMemoryVT() == MVT::i64) && SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { SDValue Chain = LD->getChain(); SDValue Ops[] = { Base, Offset, Chain }; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 1e28913..1b8f8fb 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -431,6 +431,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); setOperationAction(ISD::SELECT, VT, Promote); AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); + setOperationAction(ISD::SELECT_CC, VT, Promote); + AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); setOperationAction(ISD::STORE, VT, Promote); AddPromotedToType (ISD::STORE, VT, MVT::v4i32); @@ -7175,7 +7177,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isSplatShuffleMask(SVOp, 4) || PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || - PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || @@ -7183,8 +7184,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || - PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || - PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) { + (Subtarget.hasP8Altivec() && ( + PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) { return Op; } } @@ -7195,7 +7198,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, unsigned int ShuffleKind = isLittleEndian ? 2 : 0; if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || - PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || @@ -7203,8 +7205,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || - PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || - PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)) + (Subtarget.hasP8Altivec() && ( + PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index bf6e402..d4e666c 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -309,6 +309,11 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { unsigned MB = MI->getOperand(4).getImm(); unsigned ME = MI->getOperand(5).getImm(); + // We can't commute a trivial mask (there is no way to represent an all-zero + // mask). + if (MB == 0 && ME == 31) + return nullptr; + if (NewMI) { // Create a new instruction. unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg(); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index b50124d..24fd9bd 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -2835,24 +2835,84 @@ def : Pat<(i64 (anyext i1:$in)), (SELECT_I8 $in, (LI8 1), (LI8 0))>; // match setcc on i1 variables. +// CRANDC is: +// 1 1 : F +// 1 0 : T +// 0 1 : F +// 0 0 : F +// +// LT is: +// -1 -1 : F +// -1 0 : T +// 0 -1 : F +// 0 0 : F +// +// ULT is: +// 1 1 : F +// 1 0 : F +// 0 1 : T +// 0 0 : F def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLT)), - (CRANDC $s2, $s1)>; + (CRANDC $s1, $s2)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULT)), (CRANDC $s2, $s1)>; +// CRORC is: +// 1 1 : T +// 1 0 : T +// 0 1 : F +// 0 0 : T +// +// LE is: +// -1 -1 : T +// -1 0 : T +// 0 -1 : F +// 0 0 : T +// +// ULE is: +// 1 1 : T +// 1 0 : F +// 0 1 : T +// 0 0 : T def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLE)), - (CRORC $s2, $s1)>; + (CRORC $s1, $s2)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULE)), (CRORC $s2, $s1)>; + def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETEQ)), (CREQV $s1, $s2)>; + +// GE is: +// -1 -1 : T +// -1 0 : F +// 0 -1 : T +// 0 0 : T +// +// UGE is: +// 1 1 : T +// 1 0 : T +// 0 1 : F +// 0 0 : T def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGE)), - (CRORC $s1, $s2)>; + (CRORC $s2, $s1)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGE)), (CRORC $s1, $s2)>; + +// GT is: +// -1 -1 : F +// -1 0 : F +// 0 -1 : T +// 0 0 : F +// +// UGT is: +// 1 1 : F +// 1 0 : T +// 0 1 : F +// 0 0 : F def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGT)), - (CRANDC $s1, $s2)>; + (CRANDC $s2, $s1)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGT)), (CRANDC $s1, $s2)>; + def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETNE)), (CRXOR $s1, $s2)>; @@ -3203,18 +3263,30 @@ def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)), // select (lhs == rhs), tval, fval is: // ((lhs == rhs) & tval) | (!(lhs == rhs) & fval) def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLT)), + (CROR (CRAND (CRANDC $lhs, $rhs), $tval), + (CRAND (CRORC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULT)), (CROR (CRAND (CRANDC $rhs, $lhs), $tval), (CRAND (CRORC $lhs, $rhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLE)), + (CROR (CRAND (CRORC $lhs, $rhs), $tval), + (CRAND (CRANDC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULE)), (CROR (CRAND (CRORC $rhs, $lhs), $tval), (CRAND (CRANDC $lhs, $rhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETEQ)), (CROR (CRAND (CREQV $lhs, $rhs), $tval), (CRAND (CRXOR $lhs, $rhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGE)), + (CROR (CRAND (CRORC $rhs, $lhs), $tval), + (CRAND (CRANDC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGE)), (CROR (CRAND (CRORC $lhs, $rhs), $tval), (CRAND (CRANDC $rhs, $lhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGT)), + (CROR (CRAND (CRANDC $rhs, $lhs), $tval), + (CRAND (CRORC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGT)), (CROR (CRAND (CRANDC $lhs, $rhs), $tval), (CRAND (CRORC $rhs, $lhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)), @@ -3223,66 +3295,106 @@ def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)), // match selectcc on i1 variables with non-i1 output. def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLT)), + (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULT)), (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLE)), + (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULE)), (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETEQ)), (SELECT_I4 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGE)), + (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGE)), (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGT)), + (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGT)), (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETNE)), (SELECT_I4 (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLT)), + (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULT)), (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLE)), + (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULE)), (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETEQ)), (SELECT_I8 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGE)), + (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGE)), (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGT)), + (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGT)), (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)), (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)), (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)), (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), (SELECT_F4 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)), (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)), (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), + (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)), (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), + (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)), (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), (SELECT_F8 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), + (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)), (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), + (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)), (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), (SELECT_F8 (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLT)), + (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULT)), (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLE)), + (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULE)), (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETEQ)), (SELECT_VRRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGE)), + (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGE)), (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGT)), + (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)), (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)), (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td index 5c66b42..0a044c5 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td @@ -1115,40 +1115,64 @@ def : Pat<(v4f64 (PPCqbflt v4i1:$src)), (COPY_TO_REGCLASS $src, QFRC)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)), + (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULT)), (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)), + (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULE)), (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)), (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)), + (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGE)), (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)), + (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGT)), (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)), (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)), + (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULT)), (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)), + (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULE)), (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)), (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)), + (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGE)), (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)), + (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGT)), (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)), (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)), + (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULT)), (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)), + (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULE)), (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)), (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)), + (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGE)), (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)), + (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGT)), (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)), (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 20c95fe..ce63c22 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -958,27 +958,43 @@ def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; // Selects. def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), + (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)), (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)), + (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)), (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)), (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)), + (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)), (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)), + (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)), (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)), (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), + (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)), (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), + (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)), (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), + (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)), (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), + (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)), (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>; @@ -1060,18 +1076,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>; def : Pat<(f64 (fextend f32:$src)), (COPY_TO_REGCLASS $src, VSFRC)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)), (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)), (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)), (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)), (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), - (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; + (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; // VSX Elementary Scalar FP arithmetic (SP) let isCommutable = 1 in { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index 58d3c3d..46b8d13 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -103,6 +103,11 @@ protected: VNInfo *AddendValNo = LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn(); + if (!AddendValNo) { + // This can be null if the register is undef. + continue; + } + MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def); // The addend and this instruction must be in the same block. @@ -181,11 +186,14 @@ protected: if (!KilledProdOp) continue; - // For virtual registers, verify that the addend source register - // is live here (as should have been assured above). - assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) || - LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) && - "Addend source register is not live!"); + // If the addend copy is used only by this MI, then the addend source + // register is likely not live here. This could be fixed (based on the + // legality checks above, the live range for the addend source register + // could be extended), but it seems likely that such a trivial copy can + // be coalesced away later, and thus is not worth the effort. + if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) && + !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) + continue; // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 3fb1dcc..d7132d5 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -240,6 +240,9 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB) { + if (MI.isDebugValue()) + continue; + bool RelevantInstr = false; bool Partial = false; diff --git a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 4a33f7f..1c4e486 100644 --- a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -77,7 +77,7 @@ class SparcAsmParser : public MCTargetAsmParser { bool parseDirectiveWord(unsigned Size, SMLoc L); bool is64Bit() const { - return STI.getTargetTriple().getArchName().startswith("sparcv9"); + return STI.getTargetTriple().getArch() == Triple::sparcv9; } void expandSET(MCInst &Inst, SMLoc IDLoc, diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 88e5e47..909baae 100644 --- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -228,7 +228,7 @@ void PassManagerBuilder::populateModulePassManager( // Start of function pass. // Break up aggregate allocas, using SSAUpdater. if (UseNewSROA) - MPM.add(createSROAPass(/*RequiresDomTree*/ false)); + MPM.add(createSROAPass()); else MPM.add(createScalarReplAggregatesPass(-1, false)); MPM.add(createEarlyCSEPass()); // Catch trivial redundancies diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp index d1eba6e..89a0d0a 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1761,7 +1761,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { if (isa<PHINode>(V)) V->takeName(LI); if (Instruction *I = dyn_cast<Instruction>(V)) - I->setDebugLoc(LI->getDebugLoc()); + if (LI->getDebugLoc()) + I->setDebugLoc(LI->getDebugLoc()); if (V->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(LI); diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 50ca623..ba8af47 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -869,6 +869,11 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { PN->replaceAllUsesWith(*Inserted.first); PN->eraseFromParent(); Changed = true; + + // The RAUW can change PHIs that we already visited. Start over from the + // beginning. + PHISet.clear(); + I = BB->begin(); } } diff --git a/contrib/llvm/patches/README.TXT b/contrib/llvm/patches/README.TXT index 220baf0..96112b6 100644 --- a/contrib/llvm/patches/README.TXT +++ b/contrib/llvm/patches/README.TXT @@ -1,11 +1,11 @@ This is a set of individual patches, which contain all the customizations to llvm/clang currently in the FreeBSD base system. These can be applied in -alphabetical order to a pristine llvm/clang 3.7.0 source tree, for example by +alphabetical order to a pristine llvm/clang 3.7.1 source tree, for example by doing: -svn co https://llvm.org/svn/llvm-project/llvm/trunk llvm-3.7.0 -svn co https://llvm.org/svn/llvm-project/cfe/trunk llvm-3.7.0/tools/clang -cd llvm-3.7.0 +svn co https://llvm.org/svn/llvm-project/llvm/trunk llvm-3.7.1 +svn co https://llvm.org/svn/llvm-project/cfe/trunk llvm-3.7.1/tools/clang +cd llvm-3.7.1 for p in /usr/src/contrib/llvm/patches/patch-*.diff; do patch -p0 -f -F0 -E -i $p -s || break done diff --git a/contrib/llvm/patches/patch-10-clang-cc1as-dwarf2.diff b/contrib/llvm/patches/patch-08-clang-cc1as-dwarf2.diff index eb00168..eb00168 100644 --- a/contrib/llvm/patches/patch-10-clang-cc1as-dwarf2.diff +++ b/contrib/llvm/patches/patch-08-clang-cc1as-dwarf2.diff diff --git a/contrib/llvm/patches/patch-08-llvm-r250085-fix-avx-crash.diff b/contrib/llvm/patches/patch-08-llvm-r250085-fix-avx-crash.diff deleted file mode 100644 index a83c68e..0000000 --- a/contrib/llvm/patches/patch-08-llvm-r250085-fix-avx-crash.diff +++ /dev/null @@ -1,142 +0,0 @@ -Pull in r250085 from upstream llvm trunk (by Andrea Di Biagio): - - [x86] Fix wrong lowering of vsetcc nodes (PR25080). - - Function LowerVSETCC (in X86ISelLowering.cpp) worked under the wrong - assumption that for non-AVX512 targets, the source type and destination type - of a type-legalized setcc node were always the same type. - - This assumption was unfortunately incorrect; the type legalizer is not always - able to promote the return type of a setcc to the same type as the first - operand of a setcc. - - In the case of a vsetcc node, the legalizer firstly checks if the first input - operand has a legal type. If so, then it promotes the return type of the vsetcc - to that same type. Otherwise, the return type is promoted to the 'next legal - type', which, for vectors of MVT::i1 is always a 128-bit integer vector type. - - Example (-mattr=+avx): - - %0 = trunc <8 x i32> %a to <8 x i23> - %1 = icmp eq <8 x i23> %0, zeroinitializer - - The initial selection dag for the code above is: - - v8i1 = setcc t5, t7, seteq:ch - t5: v8i23 = truncate t2 - t2: v8i32,ch = CopyFromReg t0, Register:v8i32 %vreg1 - t7: v8i32 = build_vector of all zeroes. - - The type legalizer would firstly check if 't5' has a legal type. If so, then it - would reuse that same type to promote the return type of the setcc node. - Unfortunately 't5' is of illegal type v8i23, and therefore it cannot be used to - promote the return type of the setcc node. Consequently, the setcc return type - is promoted to v8i16. Later on, 't5' is promoted to v8i32 thus leading to the - following dag node: - v8i16 = setcc t32, t25, seteq:ch - - where t32 and t25 are now values of type v8i32. - - Before this patch, function LowerVSETCC would have wrongly expanded the setcc - to a single X86ISD::PCMPEQ. Surprisingly, ISel was still able to match an - instruction. In our case, ISel would have matched a VPCMPEQWrr: - t37: v8i16 = X86ISD::VPCMPEQWrr t36, t25 - - However, t36 and t25 are both VR256, while the result type is instead of class - VR128. This inconsistency ended up causing the insertion of COPY instructions - like this: - %vreg7<def> = COPY %vreg3; VR128:%vreg7 VR256:%vreg3 - - Which is an invalid full copy (not a sub register copy). - Eventually, the backend would have hit an UNREACHABLE "Cannot emit physreg copy - instruction" in the attempt to expand the malformed pseudo COPY instructions. - - This patch fixes the problem adding the missing logic in LowerVSETCC to handle - the corner case of a setcc with 128-bit return type and 256-bit operand type. - - This problem was originally reported by Dimitry as PR25080. It has been latent - for a very long time. I have added the minimal reproducible from that bugzilla - as test setcc-lowering.ll. - - Differential Revision: http://reviews.llvm.org/D13660 - -This should fix the "Cannot emit physreg copy instruction" errors when -compiling contrib/wpa/src/common/ieee802_11_common.c, and CPUTYPE is set -to a CPU supporting AVX (e.g. sandybridge, ivybridge). - -Introduced here: http://svnweb.freebsd.org/changeset/base/289221 - -Index: lib/Target/X86/X86ISelLowering.cpp -=================================================================== ---- lib/Target/X86/X86ISelLowering.cpp -+++ lib/Target/X86/X86ISelLowering.cpp -@@ -13573,6 +13573,35 @@ static SDValue LowerVSETCC(SDValue Op, const X86Su - DAG.getConstant(SSECC, dl, MVT::i8)); - } - -+ MVT VTOp0 = Op0.getSimpleValueType(); -+ assert(VTOp0 == Op1.getSimpleValueType() && -+ "Expected operands with same type!"); -+ assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() && -+ "Invalid number of packed elements for source and destination!"); -+ -+ if (VT.is128BitVector() && VTOp0.is256BitVector()) { -+ // On non-AVX512 targets, a vector of MVT::i1 is promoted by the type -+ // legalizer to a wider vector type. In the case of 'vsetcc' nodes, the -+ // legalizer firstly checks if the first operand in input to the setcc has -+ // a legal type. If so, then it promotes the return type to that same type. -+ // Otherwise, the return type is promoted to the 'next legal type' which, -+ // for a vector of MVT::i1 is always a 128-bit integer vector type. -+ // -+ // We reach this code only if the following two conditions are met: -+ // 1. Both return type and operand type have been promoted to wider types -+ // by the type legalizer. -+ // 2. The original operand type has been promoted to a 256-bit vector. -+ // -+ // Note that condition 2. only applies for AVX targets. -+ SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, SetCCOpcode); -+ return DAG.getZExtOrTrunc(NewOp, dl, VT); -+ } -+ -+ // The non-AVX512 code below works under the assumption that source and -+ // destination types are the same. -+ assert((Subtarget->hasAVX512() || (VT == VTOp0)) && -+ "Value types for source and destination must be the same!"); -+ - // Break 256-bit integer vector compare into smaller ones. - if (VT.is256BitVector() && !Subtarget->hasInt256()) - return Lower256IntVSETCC(Op, DAG); -Index: test/CodeGen/X86/setcc-lowering.ll -=================================================================== ---- test/CodeGen/X86/setcc-lowering.ll -+++ test/CodeGen/X86/setcc-lowering.ll -@@ -0,0 +1,29 @@ -+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s -+ -+; Verify that we don't crash during codegen due to a wrong lowering -+; of a setcc node with illegal operand types and return type. -+ -+define <8 x i16> @pr25080(<8 x i32> %a) { -+; CHECK-LABEL: pr25080: -+; CHECK: # BB#0: # %entry -+; CHECK-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 -+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 -+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 -+; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -+; CHECK-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -+; CHECK-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -+; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -+; CHECK-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 -+; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 -+; CHECK-NEXT: vpsraw $15, %xmm0, %xmm0 -+; CHECK-NEXT: vzeroupper -+; CHECK-NEXT: retq -+entry: -+ %0 = trunc <8 x i32> %a to <8 x i23> -+ %1 = icmp eq <8 x i23> %0, zeroinitializer -+ %2 = or <8 x i1> %1, <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false> -+ %3 = sext <8 x i1> %2 to <8 x i16> -+ ret <8 x i16> %3 -+} diff --git a/contrib/llvm/patches/patch-09-clang-r250657-openmp.diff b/contrib/llvm/patches/patch-09-clang-r250657-openmp.diff deleted file mode 100644 index e784673..0000000 --- a/contrib/llvm/patches/patch-09-clang-r250657-openmp.diff +++ /dev/null @@ -1,182 +0,0 @@ -Pull in r248379 from upstream clang trunk (by Jörg Sonnenberger): - - Refactor library decision for -fopenmp support from Darwin into a - function for sharing with other platforms. - -Pull in r248424 from upstream clang trunk (by Jörg Sonnenberger): - - Push OpenMP linker flags after linker input on Darwin. Don't add any - libraries if -nostdlib is specified. Test. - -Pull in r248426 from upstream clang trunk (by Jörg Sonnenberger): - - Support linking against OpenMP runtime on NetBSD. - -Pull in r250657 from upstream clang trunk (by Dimitry Andric): - - Support linking against OpenMP runtime on FreeBSD. - -Introduced here: http://svnweb.freebsd.org/changeset/base/289523 - -Index: tools/clang/lib/Driver/Tools.cpp -=================================================================== ---- tools/clang/lib/Driver/Tools.cpp -+++ tools/clang/lib/Driver/Tools.cpp -@@ -2460,6 +2460,28 @@ static OpenMPRuntimeKind getOpenMPRuntime(const To - return RT; - } - -+static void addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC, -+ const ArgList &Args) { -+ if (!Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, -+ options::OPT_fno_openmp, false)) -+ return; -+ -+ switch (getOpenMPRuntime(TC, Args)) { -+ case OMPRT_OMP: -+ CmdArgs.push_back("-lomp"); -+ break; -+ case OMPRT_GOMP: -+ CmdArgs.push_back("-lgomp"); -+ break; -+ case OMPRT_IOMP5: -+ CmdArgs.push_back("-liomp5"); -+ break; -+ case OMPRT_Unknown: -+ // Already diagnosed. -+ break; -+ } -+} -+ - static void addSanitizerRuntime(const ToolChain &TC, const ArgList &Args, - ArgStringList &CmdArgs, StringRef Sanitizer, - bool IsShared) { -@@ -6527,24 +6549,6 @@ void darwin::Linker::ConstructJob(Compilation &C, - - Args.AddAllArgs(CmdArgs, options::OPT_L); - -- if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, -- options::OPT_fno_openmp, false)) { -- switch (getOpenMPRuntime(getToolChain(), Args)) { -- case OMPRT_OMP: -- CmdArgs.push_back("-lomp"); -- break; -- case OMPRT_GOMP: -- CmdArgs.push_back("-lgomp"); -- break; -- case OMPRT_IOMP5: -- CmdArgs.push_back("-liomp5"); -- break; -- case OMPRT_Unknown: -- // Already diagnosed. -- break; -- } -- } -- - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); - // Build the input file for -filelist (list of linker input files) in case we - // need it later -@@ -6563,6 +6567,10 @@ void darwin::Linker::ConstructJob(Compilation &C, - InputFileList.push_back(II.getFilename()); - } - -+ if (!Args.hasArg(options::OPT_nostdlib) && -+ !Args.hasArg(options::OPT_nodefaultlibs)) -+ addOpenMPRuntime(CmdArgs, getToolChain(), Args); -+ - if (isObjCRuntimeLinked(Args) && !Args.hasArg(options::OPT_nostdlib) && - !Args.hasArg(options::OPT_nodefaultlibs)) { - // We use arclite library for both ARC and subscripting support. -@@ -7358,6 +7366,7 @@ void freebsd::Linker::ConstructJob(Compilation &C, - - if (!Args.hasArg(options::OPT_nostdlib) && - !Args.hasArg(options::OPT_nodefaultlibs)) { -+ addOpenMPRuntime(CmdArgs, ToolChain, Args); - if (D.CCCIsCXX()) { - ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); - if (Args.hasArg(options::OPT_pg)) -@@ -7673,6 +7682,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, - - if (!Args.hasArg(options::OPT_nostdlib) && - !Args.hasArg(options::OPT_nodefaultlibs)) { -+ addOpenMPRuntime(CmdArgs, getToolChain(), Args); - if (D.CCCIsCXX()) { - getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); - CmdArgs.push_back("-lm"); -Index: tools/clang/test/Driver/fopenmp.c -=================================================================== ---- tools/clang/test/Driver/fopenmp.c -+++ tools/clang/test/Driver/fopenmp.c -@@ -1,6 +1,15 @@ - // RUN: %clang -target x86_64-linux-gnu -fopenmp=libomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CC1-OPENMP - // RUN: %clang -target x86_64-linux-gnu -fopenmp=libgomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CC1-NO-OPENMP - // RUN: %clang -target x86_64-linux-gnu -fopenmp=libiomp5 -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CC1-OPENMP -+// RUN: %clang -target x86_64-apple-darwin -fopenmp=libomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CC1-OPENMP -+// RUN: %clang -target x86_64-apple-darwin -fopenmp=libgomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CC1-NO-OPENMP -+// RUN: %clang -target x86_64-apple-darwin -fopenmp=libiomp5 -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CC1-OPENMP -+// RUN: %clang -target x86_64-freebsd -fopenmp=libomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CC1-OPENMP -+// RUN: %clang -target x86_64-freebsd -fopenmp=libgomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CC1-NO-OPENMP -+// RUN: %clang -target x86_64-freebsd -fopenmp=libiomp5 -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CC1-OPENMP -+// RUN: %clang -target x86_64-netbsd -fopenmp=libomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CC1-OPENMP -+// RUN: %clang -target x86_64-netbsd -fopenmp=libgomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CC1-NO-OPENMP -+// RUN: %clang -target x86_64-netbsd -fopenmp=libiomp5 -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CC1-OPENMP - // - // CHECK-CC1-OPENMP: "-cc1" - // CHECK-CC1-OPENMP: "-fopenmp" -@@ -12,6 +21,30 @@ - // RUN: %clang -target x86_64-linux-gnu -fopenmp=libgomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-GOMP - // RUN: %clang -target x86_64-linux-gnu -fopenmp=libiomp5 %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-IOMP5 - // -+// RUN: %clang -nostdlib -target x86_64-linux-gnu -fopenmp=libomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-OMP -+// RUN: %clang -nostdlib -target x86_64-linux-gnu -fopenmp=libgomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-GOMP -+// RUN: %clang -nostdlib -target x86_64-linux-gnu -fopenmp=libiomp5 %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-IOMP5 -+// -+// RUN: %clang -target x86_64-darwin -fopenmp=libomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-OMP -+// RUN: %clang -target x86_64-darwin -fopenmp=libgomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-GOMP -+// RUN: %clang -target x86_64-darwin -fopenmp=libiomp5 %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-IOMP5 -+// -+// RUN: %clang -nostdlib -target x86_64-darwin -fopenmp=libomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-OMP -+// RUN: %clang -nostdlib -target x86_64-darwin -fopenmp=libgomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-GOMP -+// RUN: %clang -nostdlib -target x86_64-darwin -fopenmp=libiomp5 %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-IOMP5 -+// -+// RUN: %clang -target x86_64-netbsd -fopenmp=libomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-OMP -+// RUN: %clang -target x86_64-netbsd -fopenmp=libgomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-GOMP -+// RUN: %clang -target x86_64-netbsd -fopenmp=libiomp5 %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-IOMP5 -+// -+// RUN: %clang -nostdlib -target x86_64-freebsd -fopenmp=libomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-OMP -+// RUN: %clang -nostdlib -target x86_64-freebsd -fopenmp=libgomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-GOMP -+// RUN: %clang -nostdlib -target x86_64-freebsd -fopenmp=libiomp5 %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-IOMP5 -+// -+// RUN: %clang -nostdlib -target x86_64-netbsd -fopenmp=libomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-OMP -+// RUN: %clang -nostdlib -target x86_64-netbsd -fopenmp=libgomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-GOMP -+// RUN: %clang -nostdlib -target x86_64-netbsd -fopenmp=libiomp5 %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-IOMP5 -+// - // CHECK-LD-OMP: "{{.*}}ld{{(.exe)?}}" - // CHECK-LD-OMP: "-lomp" - // -@@ -21,6 +54,15 @@ - // CHECK-LD-IOMP5: "{{.*}}ld{{(.exe)?}}" - // CHECK-LD-IOMP5: "-liomp5" - // -+// CHECK-NO-OMP: "{{.*}}ld{{(.exe)?}}" -+// CHECK-NO-OMP-NOT: "-lomp" -+// -+// CHECK-NO-GOMP: "{{.*}}ld{{(.exe)?}}" -+// CHECK-NO-GOMP-NOT: "-lgomp" -+// -+// CHECK-NO-IOMP5: "{{.*}}ld{{(.exe)?}}" -+// CHECK-NO-IOMP5-NOT: "-liomp5" -+// - // We'd like to check that the default is sane, but until we have the ability - // to *always* semantically analyze OpenMP without always generating runtime - // calls (in the event of an unsupported runtime), we don't have a good way to -@@ -28,6 +70,9 @@ - // OpenMP runtime. - // - // RUN: %clang -target x86_64-linux-gnu -fopenmp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-ANY -+// RUN: %clang -target x86_64-darwin -fopenmp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-ANY -+// RUN: %clang -target x86_64-freebsd -fopenmp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-ANY -+// RUN: %clang -target x86_64-netbsd -fopenmp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-ANY - // - // CHECK-LD-ANY: "{{.*}}ld{{(.exe)?}}" - // CHECK-LD-ANY: "-l{{(omp|gomp|iomp5)}}" diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp index e4db004..9e44f7d 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp @@ -3968,7 +3968,13 @@ public: class MinGWX86_64TargetInfo : public WindowsX86_64TargetInfo { public: MinGWX86_64TargetInfo(const llvm::Triple &Triple) - : WindowsX86_64TargetInfo(Triple) {} + : WindowsX86_64TargetInfo(Triple) { + // Mingw64 rounds long double size and alignment up to 16 bytes, but sticks + // with x86 FP ops. Weird. + LongDoubleWidth = LongDoubleAlign = 128; + LongDoubleFormat = &llvm::APFloat::x87DoubleExtended; + } + void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override { WindowsX86_64TargetInfo::getTargetDefines(Opts, Builder); diff --git a/contrib/llvm/tools/clang/lib/Basic/Version.cpp b/contrib/llvm/tools/clang/lib/Basic/Version.cpp index ab7cff2..4b10dcd 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Version.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/Version.cpp @@ -36,7 +36,7 @@ std::string getClangRepositoryPath() { // If the SVN_REPOSITORY is empty, try to use the SVN keyword. This helps us // pick up a tag in an SVN export, for example. - StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_370/final/lib/Basic/Version.cpp $"); + StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_371/final/lib/Basic/Version.cpp $"); if (URL.empty()) { URL = SVNRepository.slice(SVNRepository.find(':'), SVNRepository.find("/lib/Basic")); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp index 3e4d7f3..0bcf59b 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp @@ -1279,12 +1279,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { } break; - case ABIArgInfo::Indirect: { - assert(!retAI.getIndirectAlign() && "Align unused on indirect return."); - resultType = llvm::Type::getVoidTy(getLLVMContext()); - break; - } - + case ABIArgInfo::Indirect: case ABIArgInfo::Ignore: resultType = llvm::Type::getVoidTy(getLLVMContext()); break; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp index a179ad4..c9c48c7 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp @@ -2493,6 +2493,11 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { StringRef MangledName = getMangledName(GD); + if (AA->getAliasee() == MangledName) { + Diags.Report(AA->getLocation(), diag::err_cyclic_alias); + return; + } + // If there is a definition in the module, then it wins over the alias. // This is dubious, but allow it to be safe. Just ignore the alias. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp index 48a8b37..25bd733 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp @@ -1552,12 +1552,10 @@ public: /// WinX86_64ABIInfo - The Windows X86_64 ABI information. class WinX86_64ABIInfo : public ABIInfo { - - ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, - bool IsReturnType) const; - public: - WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {} + WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT) + : ABIInfo(CGT), + IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {} void computeInfo(CGFunctionInfo &FI) const override; @@ -1574,6 +1572,12 @@ public: // FIXME: Assumes vectorcall is in use. return isX86VectorCallAggregateSmallEnough(NumMembers); } + +private: + ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, + bool IsReturnType) const; + + bool IsMingw64; }; class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -3070,11 +3074,6 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, if (RT->getDecl()->hasFlexibleArrayMember()) return ABIArgInfo::getIndirect(0, /*ByVal=*/false); - - // FIXME: mingw-w64-gcc emits 128-bit struct as i128 - if (Width == 128 && getTarget().getTriple().isWindowsGNUEnvironment()) - return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), - Width)); } // vectorcall adds the concept of a homogenous vector aggregate, similar to @@ -3116,6 +3115,14 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, if (BT && BT->getKind() == BuiltinType::Bool) return ABIArgInfo::getExtend(); + // Mingw64 GCC uses the old 80 bit extended precision floating point unit. It + // passes them indirectly through memory. + if (IsMingw64 && BT && BT->getKind() == BuiltinType::LongDouble) { + const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); + if (LDF == &llvm::APFloat::x87DoubleExtended) + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } + return ABIArgInfo::getDirect(); } diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp index 01966d5..9ad5aa5 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp @@ -2485,8 +2485,10 @@ bool MismatchingNewDeleteDetector::hasMatchingNewInCtor( MismatchingNewDeleteDetector::MismatchResult MismatchingNewDeleteDetector::analyzeInClassInitializer() { assert(Field != nullptr && "This should be called only for members"); - if (const CXXNewExpr *NE = - getNewExprFromInitListOrExpr(Field->getInClassInitializer())) { + const Expr *InitExpr = Field->getInClassInitializer(); + if (!InitExpr) + return EndOfTU ? NoMismatch : AnalyzeLater; + if (const CXXNewExpr *NE = getNewExprFromInitListOrExpr(InitExpr)) { if (NE->isArray() != IsArrayForm) { NewExprs.push_back(NE); return MemberInitMismatches; diff --git a/contrib/llvm/tools/llvm-lto/llvm-lto.cpp b/contrib/llvm/tools/llvm-lto/llvm-lto.cpp index 9678c83..0821898 100644 --- a/contrib/llvm/tools/llvm-lto/llvm-lto.cpp +++ b/contrib/llvm/tools/llvm-lto/llvm-lto.cpp @@ -214,8 +214,11 @@ int main(int argc, char **argv) { if (SetMergedModule && i == BaseArg) { // Transfer ownership to the code generator. CodeGen.setModule(Module.release()); - } else if (!CodeGen.addModule(Module.get())) + } else if (!CodeGen.addModule(Module.get())) { + // Print a message here so that we know addModule() did not abort. + errs() << argv[0] << ": error adding file '" << InputFilenames[i] << "'\n"; return 1; + } unsigned NumSyms = LTOMod->getSymbolCount(); for (unsigned I = 0; I < NumSyms; ++I) { |