diff options
author | dim <dim@FreeBSD.org> | 2015-12-25 21:39:45 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2015-12-25 21:39:45 +0000 |
commit | 6f44a590dad07c47cdc2fc19861574af1da36963 (patch) | |
tree | 3570671d3fc79afee8f95c67867446e3151a9d94 /contrib/llvm/lib | |
parent | 0efa1469be94566c09b9f4ce538c28e92d26026c (diff) | |
download | FreeBSD-src-6f44a590dad07c47cdc2fc19861574af1da36963.zip FreeBSD-src-6f44a590dad07c47cdc2fc19861574af1da36963.tar.gz |
Upgrade our copies of clang and llvm to 3.7.1 release. This is a
bugfix-only release, with no new features.
Please note that from 3.5.0 onwards, clang and llvm require C++11
support to build; see UPDATING for more information.
Diffstat (limited to 'contrib/llvm/lib')
40 files changed, 472 insertions, 97 deletions
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index 71c7781..a2b9316 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -169,7 +169,7 @@ void WinException::endFunction(const MachineFunction *MF) { Asm->OutStreamer->PopSection(); } - if (shouldEmitMoves) + if (shouldEmitMoves || shouldEmitPersonality) Asm->OutStreamer->EmitWinCFIEndProc(); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 21ab072..fbc8f1e 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -439,7 +439,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, ISD::ANY_EXTEND, dl, VT, Result); ValResult = Result; - ChainResult = Chain; + ChainResult = newLoad.getValue(1); return; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index a7392fa..54cfaf5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1010,6 +1010,8 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, // Calculate the element offset and add it to the pointer. unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. + assert(EltSize * 8 == EltVT.getSizeInBits() && + "Converting bits to bytes lost precision"); Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, DAG.getConstant(EltSize, dl, Index.getValueType())); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4348ab7..51cd661 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1528,9 +1528,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { if (CustomLowerNode(N, N->getValueType(0), true)) return SDValue(); - // Store the vector to the stack. - EVT EltVT = VecVT.getVectorElementType(); + // Make the vector elements byte-addressable if they aren't already. SDLoc dl(N); + EVT EltVT = VecVT.getVectorElementType(); + if (EltVT.getSizeInBits() < 8) { + SmallVector<SDValue, 4> ElementOps; + for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) { + ElementOps.push_back(DAG.getAnyExtOrTrunc( + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec, + DAG.getConstant(i, dl, MVT::i8)), + dl, MVT::i8)); + } + + EltVT = MVT::i8; + VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + VecVT.getVectorNumElements()); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, ElementOps); + } + + // Store the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo(), false, false, 0); diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp index adc620d..b553f11 100644 --- a/contrib/llvm/lib/IR/AsmWriter.cpp +++ b/contrib/llvm/lib/IR/AsmWriter.cpp @@ -794,6 +794,10 @@ void SlotTracker::processFunction() { ST_DEBUG("begin processFunction!\n"); fNext = 0; + // Process function metadata if it wasn't hit at the module-level. + if (!ShouldInitializeAllMetadata) + processFunctionMetadata(*TheFunction); + // Add all the function arguments with no names. for(Function::const_arg_iterator AI = TheFunction->arg_begin(), AE = TheFunction->arg_end(); AI != AE; ++AI) @@ -807,8 +811,6 @@ void SlotTracker::processFunction() { if (!BB.hasName()) CreateFunctionSlot(&BB); - processFunctionMetadata(*TheFunction); - for (auto &I : BB) { if (!I.getType()->isVoidTy() && !I.hasName()) CreateFunctionSlot(&I); @@ -836,11 +838,11 @@ void SlotTracker::processFunction() { void SlotTracker::processFunctionMetadata(const Function &F) { SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; - for (auto &BB : F) { - F.getAllMetadata(MDs); - for (auto &MD : MDs) - CreateMetadataSlot(MD.second); + F.getAllMetadata(MDs); + for (auto &MD : MDs) + CreateMetadataSlot(MD.second); + for (auto &BB : F) { for (auto &I : BB) processInstructionMetadata(I); } diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp index e0e729d..0eb88a9 100644 --- a/contrib/llvm/lib/IR/Core.cpp +++ b/contrib/llvm/lib/IR/Core.cpp @@ -2257,7 +2257,14 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn, } LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty, - unsigned NumClauses, const char *Name) { + LLVMValueRef PersFn, unsigned NumClauses, + const char *Name) { + // The personality used to live on the landingpad instruction, but now it + // lives on the parent function. For compatibility, take the provided + // personality and put it on the parent function. + if (PersFn) + unwrap(B)->GetInsertBlock()->getParent()->setPersonalityFn( + cast<Function>(unwrap(PersFn))); return wrap(unwrap(B)->CreateLandingPad(unwrap(Ty), NumClauses, Name)); } diff --git a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp index 149ec6a..25ae4ac 100644 --- a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -63,14 +63,21 @@ const char* LTOCodeGenerator::getVersionString() { #endif } +static void handleLTODiagnostic(const DiagnosticInfo &DI) { + DiagnosticPrinterRawOStream DP(errs()); + DI.print(DP); + errs() << "\n"; +} + LTOCodeGenerator::LTOCodeGenerator() - : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context)) { + : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context), + handleLTODiagnostic) { initializeLTOPasses(); } LTOCodeGenerator::LTOCodeGenerator(std::unique_ptr<LLVMContext> Context) : OwnedContext(std::move(Context)), Context(*OwnedContext), - IRLinker(new Module("ld-temp.o", *OwnedContext)) { + IRLinker(new Module("ld-temp.o", *OwnedContext), handleLTODiagnostic) { initializeLTOPasses(); } diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp index c601c56..a85796c 100644 --- a/contrib/llvm/lib/MC/MCContext.cpp +++ b/contrib/llvm/lib/MC/MCContext.cpp @@ -82,6 +82,7 @@ void MCContext::reset() { UsedNames.clear(); Symbols.clear(); + SectionSymbols.clear(); Allocator.Reset(); Instances.clear(); CompilationDir.clear(); diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 709d753..0a5309b 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -264,6 +264,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, for (const MachineBasicBlock &MBB : MF) { for (const MachineInstr &MI : MBB) { // TODO: CodeSize should account for multiple functions. + + // TODO: Should we count size of debug info? + if (MI.isDebugValue()) + continue; + + // FIXME: This is reporting 0 for many instructions. CodeSize += MI.getDesc().Size; unsigned numOperands = MI.getNumOperands(); diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 4a65bfc..57b7a73 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -134,13 +134,17 @@ static Value* GEPToVectorIndex(GetElementPtrInst *GEP) { // // TODO: Check isTriviallyVectorizable for calls and handle other // instructions. -static bool canVectorizeInst(Instruction *Inst) { +static bool canVectorizeInst(Instruction *Inst, User *User) { switch (Inst->getOpcode()) { case Instruction::Load: - case Instruction::Store: case Instruction::BitCast: case Instruction::AddrSpaceCast: return true; + case Instruction::Store: { + // Must be the stored pointer operand, not a stored value. + StoreInst *SI = cast<StoreInst>(Inst); + return SI->getPointerOperand() == User; + } default: return false; } @@ -166,7 +170,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { for (User *AllocaUser : Alloca->users()) { GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser); if (!GEP) { - if (!canVectorizeInst(cast<Instruction>(AllocaUser))) + if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca)) return false; WorkList.push_back(AllocaUser); @@ -184,7 +188,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { GEPVectorIdx[GEP] = Index; for (User *GEPUser : AllocaUser->users()) { - if (!canVectorizeInst(cast<Instruction>(GEPUser))) + if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser)) return false; WorkList.push_back(GEPUser); @@ -240,7 +244,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) { for (User *User : Val->users()) { if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end()) continue; - if (isa<CallInst>(User)) { + if (CallInst *CI = dyn_cast<CallInst>(User)) { + // TODO: We might be able to handle some cases where the callee is a + // constantexpr bitcast of a function. + if (!CI->getCalledFunction()) + return false; + WorkList.push_back(User); continue; } @@ -250,6 +259,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) { if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt) return false; + if (StoreInst *SI = dyn_cast_or_null<StoreInst>(UseInst)) { + // Reject if the stored value is not the pointer operand. + if (SI->getPointerOperand() != Val) + return false; + } + if (!User->getType()->isPointerTy()) continue; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td index 835a146..ba0490a 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td @@ -14,8 +14,7 @@ let Namespace = "AMDGPU" in { foreach Index = 0-15 in { - // Indices are used in a variety of ways here, so don't set a size/offset. - def sub#Index : SubRegIndex<-1, -1>; + def sub#Index : SubRegIndex<32, !shl(Index, 5)>; } def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">; diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 468563c..4434d9b 100644 --- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -71,12 +71,26 @@ void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm, } } +static unsigned getFixupKindNumBytes(unsigned Kind) { + switch (Kind) { + case FK_Data_1: + return 1; + case FK_Data_2: + return 2; + case FK_Data_4: + return 4; + case FK_Data_8: + return 8; + default: + llvm_unreachable("Unknown fixup kind!"); + } +} + void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const { switch ((unsigned)Fixup.getKind()) { - default: llvm_unreachable("Unknown fixup kind"); case AMDGPU::fixup_si_sopp_br: { uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset()); *Dst = (Value - 4) / 4; @@ -96,6 +110,24 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, *Dst = Value + 4; break; } + default: { + // FIXME: Copied from AArch64 + unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); + if (!Value) + return; // Doesn't change encoding. + MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind()); + + // Shift the value into position. + Value <<= Info.TargetOffset; + + unsigned Offset = Fixup.getOffset(); + assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + + // For each byte of the fragment that the fixup touches, mask in the + // bits from the fixup value. + for (unsigned i = 0; i != NumBytes; ++i) + Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); + } } } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 099b0b1..c2db9ff 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -157,6 +157,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); + setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); setOperationAction(ISD::LOAD, MVT::i1, Custom); @@ -2252,10 +2253,8 @@ MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG, SDValue Ptr) const { const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); - uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE | - 0xffffffff; // Size - return buildRSRC(DAG, DL, Ptr, 0, Rsrc); + return buildRSRC(DAG, DL, Ptr, 0, TII->getScratchRsrcWords23()); } SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG, diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 1891061..cfd2c42 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2778,3 +2778,16 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { return RsrcDataFormat; } + +uint64_t SIInstrInfo::getScratchRsrcWords23() const { + uint64_t Rsrc23 = getDefaultRsrcDataFormat() | + AMDGPU::RSRC_TID_ENABLE | + 0xffffffff; // Size; + + // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. + // Clear them unless we want a huge stride. + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; + + return Rsrc23; +} diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 015ea12..5053786 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -353,7 +353,7 @@ public: } uint64_t getDefaultRsrcDataFormat() const; - + uint64_t getScratchRsrcWords23() const; }; namespace AMDGPU { diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td index f78ffd7..e0eeea9 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1548,6 +1548,12 @@ defm V_WRITELANE_B32 : VOP2SI_3VI_m < // These instructions only exist on SI and CI let SubtargetPredicate = isSICI in { +let isCommutable = 1 in { +defm V_MAC_LEGACY_F32 : VOP2InstSI <vop2<0x6>, "v_mac_legacy_f32", + VOP_F32_F32_F32 +>; +} // End isCommutable = 1 + defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy >; @@ -1562,12 +1568,6 @@ defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>; } // End isCommutable = 1 } // End let SubtargetPredicate = SICI -let isCommutable = 1 in { -defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32", - VOP_F32_F32_F32 ->; -} // End isCommutable = 1 - defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32 >; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp index 0a7f684..2cd600d 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp @@ -135,8 +135,7 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { unsigned ScratchRsrcReg = RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); - uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | - 0xffffffff; // Size + uint64_t Rsrc23 = TII->getScratchRsrcWords23(); unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); @@ -152,11 +151,11 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { .addReg(ScratchRsrcReg, RegState::ImplicitDefine); BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) - .addImm(Rsrc & 0xffffffff) + .addImm(Rsrc23 & 0xffffffff) .addReg(ScratchRsrcReg, RegState::ImplicitDefine); BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) - .addImm(Rsrc >> 32) + .addImm(Rsrc23 >> 32) .addReg(ScratchRsrcReg, RegState::ImplicitDefine); // Scratch Offset diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 54c4d54..e9e8412 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -26,23 +26,25 @@ using namespace llvm; SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {} -BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { - BitVector Reserved(getNumRegs()); - Reserved.set(AMDGPU::EXEC); +void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { + MCRegAliasIterator R(Reg, this, true); - // EXEC_LO and EXEC_HI could be allocated and used as regular register, - // but this seems likely to result in bugs, so I'm marking them as reserved. - Reserved.set(AMDGPU::EXEC_LO); - Reserved.set(AMDGPU::EXEC_HI); + for (; R.isValid(); ++R) + Reserved.set(*R); +} +BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); - Reserved.set(AMDGPU::FLAT_SCR); - Reserved.set(AMDGPU::FLAT_SCR_LO); - Reserved.set(AMDGPU::FLAT_SCR_HI); + + // EXEC_LO and EXEC_HI could be allocated and used as regular register, but + // this seems likely to result in bugs, so I'm marking them as reserved. + reserveRegisterTuples(Reserved, AMDGPU::EXEC); + reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs - Reserved.set(AMDGPU::VGPR255); - Reserved.set(AMDGPU::VGPR254); + reserveRegisterTuples(Reserved, AMDGPU::VGPR254); + reserveRegisterTuples(Reserved, AMDGPU::VGPR255); // Tonga and Iceland can only allocate a fixed number of SGPRs due // to a hw bug. @@ -54,10 +56,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { for (unsigned i = Limit; i < NumSGPRs; ++i) { unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); - MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true); - - for (; R.isValid(); ++R) - Reserved.set(*R); + reserveRegisterTuples(Reserved, Reg); } } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index bfdb67c..7da6de2 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -23,7 +23,10 @@ namespace llvm { struct SIRegisterInfo : public AMDGPURegisterInfo { +private: + void reserveRegisterTuples(BitVector &, unsigned Reg) const; +public: SIRegisterInfo(); BitVector getReservedRegs(const MachineFunction &MF) const override; diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index f8f0eb2..cf6b892 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -9104,6 +9105,10 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) { return false; } + Triple T; + STI.setDefaultFeatures(T.getARMCPUForArch(Arch)); + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + getTargetStreamer().emitArch(ID); return false; } diff --git a/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp index d9e654c..9d5f1d4 100644 --- a/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -50,6 +50,7 @@ private: // Complex Pattern for address selection. bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset); }; } @@ -67,7 +68,7 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { Addr.getOpcode() == ISD::TargetGlobalAddress) return false; - // Addresses of the form FI+const or FI|const + // Addresses of the form Addr+const or Addr|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); if (isInt<32>(CN->getSExtValue())) { @@ -89,6 +90,31 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { return true; } +// ComplexPattern used on BPF FI instruction +bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { + SDLoc DL(Addr); + + if (!CurDAG->isBaseWithConstantOffset(Addr)) + return false; + + // Addresses of the form Addr+const or Addr|const + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + if (isInt<32>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = + dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); + else + return false; + + Offset = CurDAG->getTargetConstant(CN->getSExtValue(), DL, MVT::i64); + return true; + } + + return false; +} + SDNode *BPFDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); @@ -104,13 +130,6 @@ SDNode *BPFDAGToDAGISel::Select(SDNode *Node) { // tablegen selection should be handled here. switch (Opcode) { default: break; - - case ISD::UNDEF: { - errs() << "BUG: "; Node->dump(CurDAG); errs() << '\n'; - report_fatal_error("shouldn't see UNDEF during Select"); - break; - } - case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); switch (IntNo) { diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp index 58498a1..7341828 100644 --- a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -102,6 +102,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BR_CC, MVT::i64, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::SETCC, MVT::i64, Expand); setOperationAction(ISD::SELECT, MVT::i64, Expand); @@ -128,9 +129,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SUBC, MVT::i64, Expand); setOperationAction(ISD::SUBE, MVT::i64, Expand); - // no UNDEF allowed - setOperationAction(ISD::UNDEF, MVT::i64, Expand); - setOperationAction(ISD::ROTR, MVT::i64, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td index 26b2cfe..6b73db8 100644 --- a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -54,7 +54,8 @@ def i64immSExt32 : PatLeaf<(imm), [{return isInt<32>(N->getSExtValue()); }]>; // Addressing modes. -def ADDRri : ComplexPattern<i64, 2, "SelectAddr", [frameindex], []>; +def ADDRri : ComplexPattern<i64, 2, "SelectAddr", [], []>; +def FIri : ComplexPattern<i64, 2, "SelectFIAddr", [add, or], []>; // Address operands def MEMri : Operand<i64> { @@ -260,6 +261,15 @@ def MOV_rr : MOV_RR<"mov">; def MOV_ri : MOV_RI<"mov">; } +def FI_ri + : InstBPF<(outs GPR:$dst), (ins MEMri:$addr), + "lea\t$dst, $addr", + [(set i64:$dst, FIri:$addr)]> { + // This is a tentative instruction, and will be replaced + // with MOV_rr and ADD_ri in PEI phase +} + + def LD_pseudo : InstBPF<(outs GPR:$dst), (ins i64imm:$pseudo, u64imm:$imm), "ld_pseudo\t$dst, $pseudo, $imm", diff --git a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp index 8f885c3..952615b 100644 --- a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp @@ -58,14 +58,13 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned FrameReg = getFrameRegister(MF); int FrameIndex = MI.getOperand(i).getIndex(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + MachineBasicBlock &MBB = *MI.getParent(); if (MI.getOpcode() == BPF::MOV_rr) { - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); MI.getOperand(i).ChangeToRegister(FrameReg, false); - - MachineBasicBlock &MBB = *MI.getParent(); unsigned reg = MI.getOperand(i - 1).getReg(); BuildMI(MBB, ++II, DL, TII.get(BPF::ADD_ri), reg) .addReg(reg) @@ -79,8 +78,24 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (!isInt<32>(Offset)) llvm_unreachable("bug in frame offset"); - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i + 1).ChangeToImmediate(Offset); + if (MI.getOpcode() == BPF::FI_ri) { + // architecture does not really support FI_ri, replace it with + // MOV_rr <target_reg>, frame_reg + // ADD_ri <target_reg>, imm + unsigned reg = MI.getOperand(i - 1).getReg(); + + BuildMI(MBB, ++II, DL, TII.get(BPF::MOV_rr), reg) + .addReg(FrameReg); + BuildMI(MBB, II, DL, TII.get(BPF::ADD_ri), reg) + .addReg(reg) + .addImm(Offset); + + // Remove FI_ri instruction + MI.eraseFromParent(); + } else { + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i + 1).ChangeToImmediate(Offset); + } } unsigned BPFRegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h index 6fe8f83..b3d861d 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h @@ -269,6 +269,14 @@ namespace llvm { unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; + /// Returns true if a cast between SrcAS and DestAS is a noop. + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Mips doesn't have any special address spaces so we just reserve + // the first 256 for software use (e.g. OpenCL) and treat casts + // between them as noops. + return SrcAS < 256 && DestAS < 256; + } + protected: SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const; diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index cb46d73..2ebfbd1 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -115,6 +115,11 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI, if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) continue; + // Also, we have to check that the register class of the operand + // contains the zero register. + if (!MRI->getRegClass(MO.getReg())->contains(ZeroReg)) + continue; + MO.setReg(ZeroReg); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 4444466..8e118ec 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -947,11 +947,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDISdtprelHA: - // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym> - // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha + // Transform: %Xd = ADDISdtprelHA %Xs, <ga:@sym> + // Into: %Xd = ADDIS8 %Xs, sym@dtprel@ha case PPC::ADDISdtprelHA32: { - // Transform: %Rd = ADDISdtprelHA32 %R3, <ga:@sym> - // Into: %Rd = ADDIS %R3, sym@dtprel@ha + // Transform: %Rd = ADDISdtprelHA32 %Rs, <ga:@sym> + // Into: %Rd = ADDIS %Rs, sym@dtprel@ha const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -962,7 +962,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { *OutStreamer, MCInstBuilder(Subtarget->isPPC64() ? PPC::ADDIS8 : PPC::ADDIS) .addReg(MI->getOperand(0).getReg()) - .addReg(Subtarget->isPPC64() ? PPC::X3 : PPC::R3) + .addReg(MI->getOperand(1).getReg()) .addExpr(SymDtprel)); return; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp index baadf08..fd150be 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -197,10 +197,18 @@ static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { // Determining the address of a TLS variable results in a function call in // certain TLS models. static bool memAddrUsesCTR(const PPCTargetMachine *TM, - const llvm::Value *MemAddr) { + const Value *MemAddr) { const auto *GV = dyn_cast<GlobalValue>(MemAddr); - if (!GV) + if (!GV) { + // Recurse to check for constants that refer to TLS global variables. + if (const auto *CV = dyn_cast<Constant>(MemAddr)) + for (const auto &CO : CV->operands()) + if (memAddrUsesCTR(TM, CO)) + return true; + return false; + } + if (!GV->isThreadLocal()) return false; if (!TM) @@ -239,6 +247,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { switch (F->getIntrinsicID()) { default: continue; + // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr + // we're definitely using CTR. + case Intrinsic::ppc_is_decremented_ctr_nonzero: + case Intrinsic::ppc_mtctr: + return true; // VisualStudio defines setjmp as _setjmp #if defined(_MSC_VER) && defined(setjmp) && \ @@ -426,6 +439,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // Process nested loops first. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { MadeChange |= convertToCTRLoop(*I); + DEBUG(dbgs() << "Nested loop converted\n"); } // If a nested loop has been converted, then we can't convert this loop. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index b6025bf..9322268 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2570,13 +2570,25 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return nullptr; } // ISD::OR doesn't get all the bitfield insertion fun. - // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert + // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a + // bitfield insert. if (isInt32Immediate(N->getOperand(1), Imm) && N->getOperand(0).getOpcode() == ISD::OR && isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) { + // The idea here is to check whether this is equivalent to: + // (c1 & m) | (x & ~m) + // where m is a run-of-ones mask. The logic here is that, for each bit in + // c1 and c2: + // - if both are 1, then the output will be 1. + // - if both are 0, then the output will be 0. + // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will + // come from x. + // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will + // be 0. + // If that last condition is never the case, then we can form m from the + // bits that are the same between c1 and c2. unsigned MB, ME; - Imm = ~(Imm^Imm2); - if (isRunOfOnes(Imm, MB, ME)) { + if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) { SDValue Ops[] = { N->getOperand(0).getOperand(0), N->getOperand(0).getOperand(1), getI32Imm(0, dl), getI32Imm(MB, dl), @@ -2787,6 +2799,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Base, Offset; if (LD->isUnindexed() && + (LD->getMemoryVT() == MVT::f64 || + LD->getMemoryVT() == MVT::i64) && SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { SDValue Chain = LD->getChain(); SDValue Ops[] = { Base, Offset, Chain }; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 1e28913..1b8f8fb 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -431,6 +431,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); setOperationAction(ISD::SELECT, VT, Promote); AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); + setOperationAction(ISD::SELECT_CC, VT, Promote); + AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); setOperationAction(ISD::STORE, VT, Promote); AddPromotedToType (ISD::STORE, VT, MVT::v4i32); @@ -7175,7 +7177,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isSplatShuffleMask(SVOp, 4) || PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || - PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || @@ -7183,8 +7184,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || - PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || - PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) { + (Subtarget.hasP8Altivec() && ( + PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) { return Op; } } @@ -7195,7 +7198,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, unsigned int ShuffleKind = isLittleEndian ? 2 : 0; if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || - PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || @@ -7203,8 +7205,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || - PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || - PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)) + (Subtarget.hasP8Altivec() && ( + PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || + PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index bf6e402..d4e666c 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -309,6 +309,11 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { unsigned MB = MI->getOperand(4).getImm(); unsigned ME = MI->getOperand(5).getImm(); + // We can't commute a trivial mask (there is no way to represent an all-zero + // mask). + if (MB == 0 && ME == 31) + return nullptr; + if (NewMI) { // Create a new instruction. unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg(); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index b50124d..24fd9bd 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -2835,24 +2835,84 @@ def : Pat<(i64 (anyext i1:$in)), (SELECT_I8 $in, (LI8 1), (LI8 0))>; // match setcc on i1 variables. +// CRANDC is: +// 1 1 : F +// 1 0 : T +// 0 1 : F +// 0 0 : F +// +// LT is: +// -1 -1 : F +// -1 0 : T +// 0 -1 : F +// 0 0 : F +// +// ULT is: +// 1 1 : F +// 1 0 : F +// 0 1 : T +// 0 0 : F def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLT)), - (CRANDC $s2, $s1)>; + (CRANDC $s1, $s2)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULT)), (CRANDC $s2, $s1)>; +// CRORC is: +// 1 1 : T +// 1 0 : T +// 0 1 : F +// 0 0 : T +// +// LE is: +// -1 -1 : T +// -1 0 : T +// 0 -1 : F +// 0 0 : T +// +// ULE is: +// 1 1 : T +// 1 0 : F +// 0 1 : T +// 0 0 : T def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLE)), - (CRORC $s2, $s1)>; + (CRORC $s1, $s2)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULE)), (CRORC $s2, $s1)>; + def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETEQ)), (CREQV $s1, $s2)>; + +// GE is: +// -1 -1 : T +// -1 0 : F +// 0 -1 : T +// 0 0 : T +// +// UGE is: +// 1 1 : T +// 1 0 : T +// 0 1 : F +// 0 0 : T def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGE)), - (CRORC $s1, $s2)>; + (CRORC $s2, $s1)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGE)), (CRORC $s1, $s2)>; + +// GT is: +// -1 -1 : F +// -1 0 : F +// 0 -1 : T +// 0 0 : F +// +// UGT is: +// 1 1 : F +// 1 0 : T +// 0 1 : F +// 0 0 : F def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGT)), - (CRANDC $s1, $s2)>; + (CRANDC $s2, $s1)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGT)), (CRANDC $s1, $s2)>; + def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETNE)), (CRXOR $s1, $s2)>; @@ -3203,18 +3263,30 @@ def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)), // select (lhs == rhs), tval, fval is: // ((lhs == rhs) & tval) | (!(lhs == rhs) & fval) def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLT)), + (CROR (CRAND (CRANDC $lhs, $rhs), $tval), + (CRAND (CRORC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULT)), (CROR (CRAND (CRANDC $rhs, $lhs), $tval), (CRAND (CRORC $lhs, $rhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLE)), + (CROR (CRAND (CRORC $lhs, $rhs), $tval), + (CRAND (CRANDC $rhs, $lhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULE)), (CROR (CRAND (CRORC $rhs, $lhs), $tval), (CRAND (CRANDC $lhs, $rhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETEQ)), (CROR (CRAND (CREQV $lhs, $rhs), $tval), (CRAND (CRXOR $lhs, $rhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGE)), + (CROR (CRAND (CRORC $rhs, $lhs), $tval), + (CRAND (CRANDC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGE)), (CROR (CRAND (CRORC $lhs, $rhs), $tval), (CRAND (CRANDC $rhs, $lhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGT)), + (CROR (CRAND (CRANDC $rhs, $lhs), $tval), + (CRAND (CRORC $lhs, $rhs), $fval))>; +def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGT)), (CROR (CRAND (CRANDC $lhs, $rhs), $tval), (CRAND (CRORC $rhs, $lhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)), @@ -3223,66 +3295,106 @@ def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)), // match selectcc on i1 variables with non-i1 output. def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLT)), + (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULT)), (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLE)), + (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULE)), (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETEQ)), (SELECT_I4 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGE)), + (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGE)), (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGT)), + (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGT)), (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETNE)), (SELECT_I4 (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLT)), + (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULT)), (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLE)), + (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULE)), (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETEQ)), (SELECT_I8 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGE)), + (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGE)), (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGT)), + (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGT)), (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)), (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)), (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)), (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), (SELECT_F4 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)), (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)), (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), + (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)), (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), + (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)), (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), (SELECT_F8 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), + (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)), (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), + (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)), (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), (SELECT_F8 (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLT)), + (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULT)), (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLE)), + (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULE)), (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETEQ)), (SELECT_VRRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGE)), + (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGE)), (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGT)), + (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)), (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)), (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td index 5c66b42..0a044c5 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td @@ -1115,40 +1115,64 @@ def : Pat<(v4f64 (PPCqbflt v4i1:$src)), (COPY_TO_REGCLASS $src, QFRC)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)), + (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULT)), (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)), + (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETULE)), (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)), (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)), + (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGE)), (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)), + (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETUGT)), (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)), (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)), + (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULT)), (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)), + (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETULE)), (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)), (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)), + (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGE)), (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)), + (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETUGT)), (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)), (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)), + (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULT)), (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)), + (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETULE)), (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)), (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)), + (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGE)), (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)), + (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETUGT)), (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)), (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 20c95fe..ce63c22 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -958,27 +958,43 @@ def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; // Selects. def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), + (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)), (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)), + (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)), (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)), (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)), + (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)), (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)), + (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)), (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)), (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), + (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)), (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), + (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)), (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), + (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)), (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), + (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)), (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>; @@ -1060,18 +1076,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>; def : Pat<(f64 (fextend f32:$src)), (COPY_TO_REGCLASS $src, VSFRC)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)), (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)), (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)), (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)), (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), - (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; + (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; // VSX Elementary Scalar FP arithmetic (SP) let isCommutable = 1 in { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index 58d3c3d..46b8d13 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -103,6 +103,11 @@ protected: VNInfo *AddendValNo = LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn(); + if (!AddendValNo) { + // This can be null if the register is undef. + continue; + } + MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def); // The addend and this instruction must be in the same block. @@ -181,11 +186,14 @@ protected: if (!KilledProdOp) continue; - // For virtual registers, verify that the addend source register - // is live here (as should have been assured above). - assert((!TargetRegisterInfo::isVirtualRegister(AddendSrcReg) || - LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) && - "Addend source register is not live!"); + // If the addend copy is used only by this MI, then the addend source + // register is likely not live here. This could be fixed (based on the + // legality checks above, the live range for the addend source register + // could be extended), but it seems likely that such a trivial copy can + // be coalesced away later, and thus is not worth the effort. + if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) && + !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) + continue; // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 3fb1dcc..d7132d5 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -240,6 +240,9 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB) { + if (MI.isDebugValue()) + continue; + bool RelevantInstr = false; bool Partial = false; diff --git a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 4a33f7f..1c4e486 100644 --- a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -77,7 +77,7 @@ class SparcAsmParser : public MCTargetAsmParser { bool parseDirectiveWord(unsigned Size, SMLoc L); bool is64Bit() const { - return STI.getTargetTriple().getArchName().startswith("sparcv9"); + return STI.getTargetTriple().getArch() == Triple::sparcv9; } void expandSET(MCInst &Inst, SMLoc IDLoc, diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 88e5e47..909baae 100644 --- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -228,7 +228,7 @@ void PassManagerBuilder::populateModulePassManager( // Start of function pass. // Break up aggregate allocas, using SSAUpdater. if (UseNewSROA) - MPM.add(createSROAPass(/*RequiresDomTree*/ false)); + MPM.add(createSROAPass()); else MPM.add(createScalarReplAggregatesPass(-1, false)); MPM.add(createEarlyCSEPass()); // Catch trivial redundancies diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp index d1eba6e..89a0d0a 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1761,7 +1761,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { if (isa<PHINode>(V)) V->takeName(LI); if (Instruction *I = dyn_cast<Instruction>(V)) - I->setDebugLoc(LI->getDebugLoc()); + if (LI->getDebugLoc()) + I->setDebugLoc(LI->getDebugLoc()); if (V->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(LI); diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 50ca623..ba8af47 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -869,6 +869,11 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { PN->replaceAllUsesWith(*Inserted.first); PN->eraseFromParent(); Changed = true; + + // The RAUW can change PHIs that we already visited. Start over from the + // beginning. + PHISet.clear(); + I = BB->begin(); } } |