diff options
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC')
26 files changed, 586 insertions, 296 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 5c38fe1..30f232a 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -51,10 +51,9 @@ static MCInstrInfo *createPPCMCInstrInfo() { return X; } -static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) { - Triple TheTriple(TT); - bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 || - TheTriple.getArch() == Triple::ppc64le); +static MCRegisterInfo *createPPCMCRegisterInfo(const Triple &TT) { + bool isPPC64 = + (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le); unsigned Flavour = isPPC64 ? 0 : 1; unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR; @@ -65,9 +64,7 @@ static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) { static MCSubtargetInfo *createPPCMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitPPCMCSubtargetInfo(X, TT, CPU, FS); - return X; + return createPPCMCSubtargetInfoImpl(TT, CPU, FS); } static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, @@ -90,22 +87,20 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM, +static MCCodeGenInfo *createPPCMCCodeGenInfo(const Triple &TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); if (RM == Reloc::Default) { - Triple T(TT); - if (T.isOSDarwin()) + if (TT.isOSDarwin()) RM = Reloc::DynamicNoPIC; else RM = Reloc::Static; } if (CM == CodeModel::Default) { - Triple T(TT); - if (!T.isOSDarwin() && - (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le)) + if (!TT.isOSDarwin() && + (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le)) CM = CodeModel::Medium; } X->initMCCodeGenInfo(RM, CM, OL); @@ -231,7 +226,7 @@ static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S, static MCTargetStreamer * createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { const Triple &TT = STI.getTargetTriple(); - if (TT.getObjectFormat() == Triple::ELF) + if (TT.isOSBinFormatELF()) return new PPCTargetELFStreamer(S); return new PPCTargetMachOStreamer(S); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 87a5236..199a0de 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -197,7 +197,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, // External or weakly linked global variables need non-lazily-resolved stubs if (TM.getRelocationModel() != Reloc::Static && - (GV->isDeclaration() || GV->isWeakForLinker())) { + !GV->isStrongDefinitionForLinker()) { if (!GV->hasHiddenVisibility()) { SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = @@ -369,28 +369,70 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget && "High 16 bits of call target should be zero."); unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); - EncodedBytes = 6*4; + EncodedBytes = 0; // Materialize the jump address: EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8) .addReg(ScratchReg) .addImm((CallTarget >> 32) & 0xFFFF)); + ++EncodedBytes; EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC) .addReg(ScratchReg) .addReg(ScratchReg) .addImm(32).addImm(16)); + ++EncodedBytes; EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8) .addReg(ScratchReg) .addReg(ScratchReg) .addImm((CallTarget >> 16) & 0xFFFF)); + ++EncodedBytes; EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8) .addReg(ScratchReg) .addReg(ScratchReg) .addImm(CallTarget & 0xFFFF)); + // Save the current TOC pointer before the remote call. + int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40; + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::STD) + .addReg(PPC::X2) + .addImm(TOCSaveOffset) + .addReg(PPC::X1)); + ++EncodedBytes; + + + // If we're on ELFv1, then we need to load the actual function pointer from + // the function descriptor. + if (!Subtarget->isELFv2ABI()) { + // Load the new TOC pointer and the function address, but not r11 + // (needing this is rare, and loading it here would prevent passing it + // via a 'nest' parameter. + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD) + .addReg(PPC::X2) + .addImm(8) + .addReg(ScratchReg)); + ++EncodedBytes; + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD) + .addReg(ScratchReg) + .addImm(0) + .addReg(ScratchReg)); + ++EncodedBytes; + } + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8).addReg(ScratchReg)); + ++EncodedBytes; EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8)); + ++EncodedBytes; + + // Restore the TOC pointer after the call. + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD) + .addReg(PPC::X2) + .addImm(TOCSaveOffset) + .addReg(PPC::X1)); + ++EncodedBytes; } + // Each instruction is 4 bytes. + EncodedBytes *= 4; + // Emit padding. unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm(); assert(NumBytes >= EncodedBytes && @@ -624,7 +666,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { IsExternal = GV->isDeclaration(); IsCommon = GV->hasCommonLinkage(); IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() && - (GV->isDeclaration() || GV->isWeakForLinker()); + !GV->isStrongDefinitionForLinker(); IsAvailExt = GV->hasAvailableExternallyLinkage(); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); @@ -706,7 +748,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MOSymbol = getSymbol(GV); IsExternal = GV->isDeclaration(); IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() && - (GV->isDeclaration() || GV->isWeakForLinker()); + !GV->isStrongDefinitionForLinker(); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp index 4161317..baadf08 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -351,8 +351,9 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { Opcode = ISD::FTRUNC; break; } - MVT VTy = - TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true); + auto &DL = CI->getModule()->getDataLayout(); + MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(), + true); if (VTy == MVT::Other) return true; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td index 874a6fc..5bc9124 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -133,6 +133,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[ // register having an odd register number. CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>, + // The 'nest' parameter, if any, is passed in R11. + CCIfNest<CCAssignToReg<[R11]>>, + // The first 8 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp index fafcd76..5f236f7 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -262,7 +262,7 @@ static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) { // fast-isel, and return its equivalent machine type in VT. // FIXME: Copied directly from ARM -- factor into base class? bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) { - EVT Evt = TLI.getValueType(Ty, true); + EVT Evt = TLI.getValueType(DL, Ty, true); // Only handle simple types. if (Evt == MVT::Other || !Evt.isSimple()) return false; @@ -324,12 +324,13 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { return PPCComputeAddress(U->getOperand(0), Addr); case Instruction::IntToPtr: // Look past no-op inttoptrs. - if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return PPCComputeAddress(U->getOperand(0), Addr); break; case Instruction::PtrToInt: // Look past no-op ptrtoints. - if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return PPCComputeAddress(U->getOperand(0), Addr); break; case Instruction::GetElementPtr: { @@ -799,7 +800,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) { bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, bool IsZExt, unsigned DestReg) { Type *Ty = SrcValue1->getType(); - EVT SrcEVT = TLI.getValueType(Ty, true); + EVT SrcEVT = TLI.getValueType(DL, Ty, true); if (!SrcEVT.isSimple()) return false; MVT SrcVT = SrcEVT.getSimpleVT(); @@ -893,8 +894,8 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, // Attempt to fast-select a floating-point extend instruction. bool PPCFastISel::SelectFPExt(const Instruction *I) { Value *Src = I->getOperand(0); - EVT SrcVT = TLI.getValueType(Src->getType(), true); - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::f32 || DestVT != MVT::f64) return false; @@ -911,8 +912,8 @@ bool PPCFastISel::SelectFPExt(const Instruction *I) { // Attempt to fast-select a floating-point truncate instruction. bool PPCFastISel::SelectFPTrunc(const Instruction *I) { Value *Src = I->getOperand(0); - EVT SrcVT = TLI.getValueType(Src->getType(), true); - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::f64 || DestVT != MVT::f32) return false; @@ -992,7 +993,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { return false; Value *Src = I->getOperand(0); - EVT SrcEVT = TLI.getValueType(Src->getType(), true); + EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true); if (!SrcEVT.isSimple()) return false; @@ -1157,7 +1158,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { // Attempt to fast-select a binary integer operation that isn't already // handled automatically. bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); // We can get here in the case when we have a binary operation on a non-legal // type and the target independent selector doesn't know how to handle it. @@ -1594,7 +1595,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) { if (Ret->getNumOperands() > 0) { SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; @@ -1641,7 +1642,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) { RetRegs.push_back(VA.getLocReg()); unsigned SrcReg = Reg + VA.getValNo(); - EVT RVEVT = TLI.getValueType(RV->getType()); + EVT RVEVT = TLI.getValueType(DL, RV->getType()); if (!RVEVT.isSimple()) return false; MVT RVVT = RVEVT.getSimpleVT(); @@ -1769,8 +1770,8 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) { // Attempt to fast-select an integer truncate instruction. bool PPCFastISel::SelectTrunc(const Instruction *I) { Value *Src = I->getOperand(0); - EVT SrcVT = TLI.getValueType(Src->getType(), true); - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16) return false; @@ -1806,8 +1807,8 @@ bool PPCFastISel::SelectIntExt(const Instruction *I) { if (!SrcReg) return false; EVT SrcEVT, DestEVT; - SrcEVT = TLI.getValueType(SrcTy, true); - DestEVT = TLI.getValueType(DestTy, true); + SrcEVT = TLI.getValueType(DL, SrcTy, true); + DestEVT = TLI.getValueType(DL, DestTy, true); if (!SrcEVT.isSimple()) return false; if (!DestEVT.isSimple()) @@ -1979,7 +1980,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // on the "if" path here. if (CModel == CodeModel::Large || (GV->getType()->getElementType()->isFunctionTy() && - (GV->isDeclaration() || GV->isWeakForLinker())) || + !GV->isStrongDefinitionForLinker()) || GV->isDeclaration() || GV->hasCommonLinkage() || GV->hasAvailableExternallyLinkage()) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), @@ -2127,7 +2128,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT, // Materialize a constant into a register, and return the register // number (or zero if we failed to handle it). unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) { - EVT CEVT = TLI.getValueType(C->getType(), true); + EVT CEVT = TLI.getValueType(DL, C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) return 0; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index b4008e4..87229d8 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -306,9 +306,10 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); DebugLoc dl = MI->getDebugLoc(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UsedRegMask = 0; for (unsigned i = 0; i != 32; ++i) - if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i])) + if (MRI.isPhysRegModified(VRRegNo[i])) UsedRegMask |= 1 << (31-i); // Live in and live out values already must be in the mask, so don't bother @@ -1158,9 +1159,11 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } -void -PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *) const { +void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + const PPCRegisterInfo *RegInfo = static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); @@ -1168,8 +1171,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); unsigned LR = RegInfo->getRARegister(); FI->setMustSaveLR(MustSaveLR(MF, LR)); - MachineRegisterInfo &MRI = MF.getRegInfo(); - MRI.setPhysRegUnused(LR); + SavedRegs.reset(LR); // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); @@ -1214,9 +1216,9 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the // function uses CR 2, 3, or 4. if (!isPPC64 && !isDarwinABI && - (MRI.isPhysRegUsed(PPC::CR2) || - MRI.isPhysRegUsed(PPC::CR3) || - MRI.isPhysRegUsed(PPC::CR4))) { + (SavedRegs.test(PPC::CR2) || + SavedRegs.test(PPC::CR3) || + SavedRegs.test(PPC::CR4))) { int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); FI->setCRSpillFrameIndex(FrameIdx); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h index 28d074e..d6a389b 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -45,8 +45,8 @@ public: bool needsFP(const MachineFunction &MF) const; void replaceFPWithRealFP(MachineFunction &MF) const; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = nullptr) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS = nullptr) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS = nullptr) const override; void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index c85c261..01a3acb 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -102,7 +102,8 @@ namespace { /// getSmallIPtrImm - Return a target constant of pointer type. inline SDValue getSmallIPtrImm(unsigned Imm, SDLoc dl) { - return CurDAG->getTargetConstant(Imm, dl, PPCLowering->getPointerTy()); + return CurDAG->getTargetConstant( + Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); } /// isRotateAndMask - Returns true if Mask and Shift can be folded into a @@ -313,7 +314,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { const Module *M = MF->getFunction()->getParent(); DebugLoc dl; - if (PPCLowering->getPointerTy() == MVT::i32) { + if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { if (PPCSubTarget->isTargetELF()) { GlobalBaseReg = PPC::R30; if (M->getPICLevel() == PICLevel::Small) { @@ -342,7 +343,8 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { } } return CurDAG->getRegister(GlobalBaseReg, - PPCLowering->getPointerTy()).getNode(); + PPCLowering->getPointerTy(CurDAG->getDataLayout())) + .getNode(); } /// isIntS16Immediate - This method tests to see if the node is either a 32-bit @@ -2205,7 +2207,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { SDLoc dl(N); unsigned Imm; ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); - EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = + CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); if (!PPCSubTarget->useCRBits() && @@ -2468,10 +2471,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Offset, Base, Chain }; - return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl, - LD->getValueType(0), - PPCLowering->getPointerTy(), - MVT::Other, Ops)); + return transferMemOperands( + N, CurDAG->getMachineNode( + Opcode, dl, LD->getValueType(0), + PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, + Ops)); } else { unsigned Opcode; bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; @@ -2506,10 +2510,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Base, Offset, Chain }; - return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl, - LD->getValueType(0), - PPCLowering->getPointerTy(), - MVT::Other, Ops)); + return transferMemOperands( + N, CurDAG->getMachineNode( + Opcode, dl, LD->getValueType(0), + PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, + Ops)); } } @@ -2662,7 +2667,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { } case ISD::SELECT_CC: { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); - EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = + CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); // If this is a select of i1 operands, we'll pattern match it. @@ -2901,7 +2907,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { const GlobalValue *GValue = G->getGlobal(); if ((GValue->getType()->getElementType()->isFunctionTy() && - (GValue->isDeclaration() || GValue->isWeakForLinker())) || + !GValue->isStrongDefinitionForLinker()) || GValue->isDeclaration() || GValue->hasCommonLinkage() || GValue->hasAvailableExternallyLinkage()) return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl, @@ -2915,7 +2921,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { // Generate a PIC-safe GOT reference. assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); - return CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(), MVT::i32); + return CurDAG->SelectNodeTo( + N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()), + MVT::i32); } case PPCISD::VADD_SPLAT: { // This expands into one of three sequences, depending on whether @@ -3398,9 +3406,8 @@ void PPCDAGToDAGISel::PeepholeCROps() { bool IsModified; do { IsModified = false; - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { - MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I); + for (SDNode &Node : CurDAG->allnodes()) { + MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); if (!MachineNode || MachineNode->use_empty()) continue; SDNode *ResNode = MachineNode; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 594472b..0ed9b05 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -952,7 +952,8 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. -unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { +unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, + const DataLayout &DL) const { // Darwin passes everything on 4 byte boundary. if (Subtarget.isDarwin()) return 4; @@ -1055,7 +1056,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; } -EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const { +EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, + EVT VT) const { if (!VT.isVector()) return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; @@ -1101,7 +1103,7 @@ static bool isConstantOrUndef(int Op, int Val) { /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { - bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian(); + bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; @@ -1132,7 +1134,7 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { - bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian(); + bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; @@ -1174,7 +1176,7 @@ bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, if (!Subtarget.hasP8Vector()) return false; - bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian(); + bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; @@ -1237,7 +1239,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, /// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG) { - if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + if (DAG.getDataLayout().isLittleEndian()) { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 0, 0); else if (ShuffleKind == 2) // swapped @@ -1262,7 +1264,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, /// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG) { - if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + if (DAG.getDataLayout().isLittleEndian()) { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 8, 8); else if (ShuffleKind == 2) // swapped @@ -1352,7 +1354,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, */ bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG) { - if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + if (DAG.getDataLayout().isLittleEndian()) { unsigned indexOffset = CheckEven ? 4 : 0; if (ShuffleKind == 1) // Unary return isVMerge(N, indexOffset, 0); @@ -1399,7 +1401,7 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, if (ShiftAmt < i) return -1; ShiftAmt -= i; - bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian(); + bool isLE = DAG.getDataLayout().isLittleEndian(); if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { // Check the rest of the elements to see if they are consecutive. @@ -1456,7 +1458,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); assert(isSplatShuffleMask(SVOp, EltSize)); - if (DAG.getTarget().getDataLayout()->isLittleEndian()) + if (DAG.getDataLayout().isLittleEndian()) return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); else return SVOp->getMaskElt(0) / EltSize; @@ -1796,7 +1798,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, } } - Disp = DAG.getTargetConstant(0, dl, getPointerTy()); + Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); @@ -2084,7 +2086,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool is64bit = Subtarget.isPPC64(); const Module *M = DAG.getMachineFunction().getFunction()->getParent(); PICLevel::Level picLevel = M->getPICLevel(); @@ -2270,7 +2272,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const { SDNode *Node = Op.getNode(); EVT VT = Node->getValueType(0); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); @@ -2399,11 +2401,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SDValue Nest = Op.getOperand(3); // 'nest' parameter value SDLoc dl(Op); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); - Type *IntPtrTy = - DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( - *DAG.getContext()); + Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -2440,7 +2440,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), @@ -2476,8 +2476,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); - - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), PtrVT); @@ -2797,7 +2796,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); @@ -3023,7 +3022,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( assert(!(CallConv == CallingConv::Fast && isVarArg) && "fastcc not supported on varargs functions"); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); @@ -3059,12 +3058,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( unsigned NumBytes = LinkageSize; unsigned AvailableFPRs = Num_FPR_Regs; unsigned AvailableVRs = Num_VR_Regs; - for (unsigned i = 0, e = Ins.size(); i != e; ++i) + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + if (Ins[i].Flags.isNest()) + continue; + if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, PtrByteSize, LinkageSize, ParamAreaSize, NumBytes, AvailableFPRs, AvailableVRs, Subtarget.hasQPX())) HasParameterArea = true; + } // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, @@ -3216,6 +3219,17 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( case MVT::i1: case MVT::i32: case MVT::i64: + if (Flags.isNest()) { + // The 'nest' parameter, if any, is passed in R11. + unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); + + if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) + ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); + + break; + } + // These can be scalar arguments or elements of an integer array type // passed directly. Clang may use those instead of "byval" aggregate // types to avoid forcing arguments to memory unnecessarily. @@ -3425,7 +3439,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && @@ -3845,7 +3859,8 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { return nullptr; // Top 6 bits have to be sext of immediate. return DAG.getConstant((int)C->getZExtValue() >> 2, SDLoc(Op), - DAG.getTargetLoweringInfo().getPointerTy()).getNode(); + DAG.getTargetLoweringInfo().getPointerTy( + DAG.getDataLayout())).getNode(); } namespace { @@ -3991,7 +4006,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, bool isVector, SmallVectorImpl<SDValue> &MemOpChains, SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, SDLoc dl) { - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); if (!isTailCall) { if (isVector) { SDValue StackPtr; @@ -4053,7 +4068,7 @@ static bool isFunctionGlobalAddress(SDValue Callee) { static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff, - bool isTailCall, bool IsPatchPoint, + bool isTailCall, bool IsPatchPoint, bool hasNest, SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass, SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, ImmutableCallSite *CS, const PPCSubtarget &Subtarget) { @@ -4062,7 +4077,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, bool isSVR4ABI = Subtarget.isSVR4ABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. @@ -4084,8 +4099,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, if ((DAG.getTarget().getRelocationModel() != Reloc::Static && (Subtarget.getTargetTriple().isMacOSX() && Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && - (G->getGlobal()->isDeclaration() || - G->getGlobal()->isWeakForLinker())) || + !G->getGlobal()->isStrongDefinitionForLinker()) || (Subtarget.isTargetELF() && !isPPC64 && !G->getGlobal()->hasLocalLinkage() && DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { @@ -4196,11 +4210,15 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, Chain = TOCVal.getValue(0); InFlag = TOCVal.getValue(1); - SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, - InFlag); + // If the function call has an explicit 'nest' parameter, it takes the + // place of the environment pointer. + if (!hasNest) { + SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, + InFlag); - Chain = EnvVal.getValue(0); - InFlag = EnvVal.getValue(1); + Chain = EnvVal.getValue(0); + InFlag = EnvVal.getValue(1); + } MTCTROps[0] = Chain; MTCTROps[1] = LoadFuncPtr; @@ -4218,7 +4236,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, CallOpc = PPCISD::BCTRL; Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) - if (isSVR4ABI && isPPC64 && !isELFv2ABI) + if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) @@ -4254,8 +4272,7 @@ static bool isLocalCall(const SDValue &Callee) { if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - return !G->getGlobal()->isDeclaration() && - !G->getGlobal()->isWeakForLinker(); + return G->getGlobal()->isStrongDefinitionForLinker(); return false; } @@ -4308,7 +4325,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SDValue PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall, bool isVarArg, bool IsPatchPoint, - SelectionDAG &DAG, + bool hasNest, SelectionDAG &DAG, SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag, SDValue Chain, @@ -4321,8 +4338,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, std::vector<EVT> NodeTys; SmallVector<SDValue, 8> Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, - SPDiff, isTailCall, IsPatchPoint, RegsToPass, - Ops, NodeTys, CS, Subtarget); + SPDiff, isTailCall, IsPatchPoint, hasNest, + RegsToPass, Ops, NodeTys, CS, Subtarget); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) @@ -4381,7 +4398,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // allocated and an unnecessary move instruction being generated. CallOpc = PPCISD::BCTRL_LOAD_TOC; - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); @@ -4586,7 +4603,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, unsigned LocMemOffset = ByValVA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); - PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), + StackPtr, PtrOff); // Create a copy of the argument in the local area of the current // stack frame. @@ -4623,7 +4641,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, if (!isTailCall) { SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); - PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), + StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), @@ -4664,7 +4683,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, false, TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, + return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, + /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); } @@ -4703,8 +4723,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); + bool hasNest = false; - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); unsigned PtrByteSize = 8; MachineFunction &MF = DAG.getMachineFunction(); @@ -4758,6 +4779,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, EVT ArgVT = Outs[i].VT; EVT OrigVT = Outs[i].ArgVT; + if (Flags.isNest()) + continue; + if (CallConv == CallingConv::Fast) { if (Flags.isByVal()) NumGPRsUsed += (Flags.getByValSize()+7)/8; @@ -5021,6 +5045,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, case MVT::i1: case MVT::i32: case MVT::i64: + if (Flags.isNest()) { + // The 'nest' parameter, if any, is passed in R11. + RegsToPass.push_back(std::make_pair(PPC::X11, Arg)); + hasNest = true; + break; + } + // These can be scalar arguments or elements of an integer array type // passed directly. Clang may use those instead of "byval" aggregate // types to avoid forcing arguments to memory unnecessarily. @@ -5302,9 +5333,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, FPOp, true, TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, - RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, - NumBytes, Ins, InVals, CS); + return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, + hasNest, DAG, RegsToPass, InFlag, Chain, CallSeqStart, + Callee, SPDiff, NumBytes, Ins, InVals, CS); } SDValue @@ -5320,7 +5351,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, unsigned NumOps = Outs.size(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; unsigned PtrByteSize = isPPC64 ? 8 : 4; @@ -5693,7 +5724,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, FPOp, true, TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, + return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, + /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); } @@ -5764,7 +5796,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); // Get the corect type for pointers. - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); // Construct the stack pointer operand. bool isPPC64 = Subtarget.isPPC64(); @@ -5794,7 +5826,7 @@ SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. @@ -5817,7 +5849,7 @@ SDValue PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. @@ -5845,7 +5877,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDLoc dl(Op); // Get the corect type for pointers. - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); // Negate the size. SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, DAG.getConstant(0, dl, PtrVT), Size); @@ -5888,8 +5920,9 @@ SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue BasePtr = LD->getBasePtr(); MachineMemOperand *MMO = LD->getMemOperand(); - SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain, - BasePtr, MVT::i8, MMO); + SDValue NewLD = + DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain, + BasePtr, MVT::i8, MMO); SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; @@ -5913,7 +5946,8 @@ SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue Value = ST->getValue(); MachineMemOperand *MMO = ST->getMemOperand(); - Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value); + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()), + Value); return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); } @@ -6374,7 +6408,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SINT.getOpcode() == ISD::ZERO_EXTEND)) && SINT.getOperand(0).getValueType() == MVT::i32) { MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); @@ -6419,7 +6453,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // then lfd it and fcfid it. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); SDValue Ld; if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { @@ -6506,7 +6540,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); EVT VT = Op.getValueType(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); // Save FP Control Word to register EVT NodeTys[] = { @@ -6727,7 +6761,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); assert(BVN->getNumOperands() == 4 && @@ -6760,9 +6794,9 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } Constant *CP = ConstantVector::get(CV); - SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(), - 16 /* alignment */); - + SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), + 16 /* alignment */); + SmallVector<SDValue, 2> Ops; Ops.push_back(DAG.getEntryNode()); Ops.push_back(CPIdx); @@ -7453,7 +7487,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, // Create a stack slot that is 16-byte aligned. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); // Store the input value into Value#0 of the stack slot. @@ -7499,7 +7533,7 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue StoreChain = DAG.getEntryNode(); @@ -7651,9 +7685,9 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, SmallVector<SDValue, 8> Stores; for (unsigned Idx = 0; Idx < 4; ++Idx) { - SDValue Ex = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, - DAG.getConstant(Idx, dl, getVectorIdxTy())); + SDValue Ex = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, + DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout()))); SDValue Store; if (ScalarVT != ScalarMemVT) Store = @@ -7715,7 +7749,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SmallVector<SDValue, 2> Ops; @@ -7920,7 +7954,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, assert(N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"); - EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0)); + EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + N->getValueType(0)); SDVTList VTs = DAG.getVTList(SVT, MVT::Other); SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), N->getOperand(1)); @@ -8248,7 +8283,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); - MVT PVT = getPointerTy(); + MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); // For v = setjmp(buf), we generate @@ -8386,7 +8421,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); - MVT PVT = getPointerTy(); + MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); @@ -9032,6 +9067,19 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Target Optimization Hooks //===----------------------------------------------------------------------===// +static std::string getRecipOp(const char *Base, EVT VT) { + std::string RecipOp(Base); + if (VT.getScalarType() == MVT::f64) + RecipOp += "d"; + else + RecipOp += "f"; + + if (VT.isVector()) + RecipOp = "vec-" + RecipOp; + + return RecipOp; +} + SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps, @@ -9043,13 +9091,12 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { - // Convergence is quadratic, so we essentially double the number of digits - // correct after every iteration. For both FRE and FRSQRTE, the minimum - // architected relative accuracy is 2^-5. When hasRecipPrec(), this is - // 2^-14. IEEE float has 23 digits and double has 52 digits. - RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; - if (VT.getScalarType() == MVT::f64) - ++RefinementSteps; + TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; + std::string RecipOp = getRecipOp("sqrt", VT); + if (!Recips.isEnabled(RecipOp)) + return SDValue(); + + RefinementSteps = Recips.getRefinementSteps(RecipOp); UseOneConstNR = true; return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } @@ -9066,13 +9113,12 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { - // Convergence is quadratic, so we essentially double the number of digits - // correct after every iteration. For both FRE and FRSQRTE, the minimum - // architected relative accuracy is 2^-5. When hasRecipPrec(), this is - // 2^-14. IEEE float has 23 digits and double has 52 digits. - RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; - if (VT.getScalarType() == MVT::f64) - ++RefinementSteps; + TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; + std::string RecipOp = getRecipOp("div", VT); + if (!Recips.isEnabled(RecipOp)) + return SDValue(); + + RefinementSteps = Recips.getRefinementSteps(RecipOp); return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); } return SDValue(); @@ -9854,7 +9900,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, assert(N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"); - EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0)); + EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); SDValue ShiftCst = DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); return DAG.getNode(ISD::SRA, dl, N->getValueType(0), @@ -10145,9 +10191,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, EVT MemVT = LD->getMemoryVT(); Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); + unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); - unsigned ScalarABIAlignment = getDataLayout()->getABITypeAlignment(STy); + unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy); if (LD->isUnindexed() && VT.isVector() && ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && // P8 and later hardware should just use LOAD. @@ -10219,7 +10265,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 2*MemVT.getStoreSize()-1); // Create the new base load. - SDValue LDXIntID = DAG.getTargetConstant(IntrLD, dl, getPointerTy()); + SDValue LDXIntID = + DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout())); SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue BaseLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, @@ -10243,7 +10290,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (!findConsecutiveLoad(LD, DAG)) --IncValue; - SDValue Increment = DAG.getConstant(IncValue, dl, getPointerTy()); + SDValue Increment = + DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout())); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); MachineMemOperand *ExtraMMO = @@ -10691,7 +10739,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { /// getConstraintType - Given a constraint, return the type of /// constraint it is for this target. PPCTargetLowering::ConstraintType -PPCTargetLowering::getConstraintType(const std::string &Constraint) const { +PPCTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; @@ -10776,7 +10824,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight( std::pair<unsigned, const TargetRegisterClass *> PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, + StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters @@ -10923,8 +10971,8 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. -bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty, +bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, unsigned AS) const { // PPC does not allow r+i addressing modes for vectors! if (Ty->isVectorTy() && AM.BaseOffs != 0) @@ -10977,22 +11025,22 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); FuncInfo->setLRStoreRequired(); bool isPPC64 = Subtarget.isPPC64(); + auto PtrVT = getPointerTy(MF.getDataLayout()); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, isPPC64 ? MVT::i64 : MVT::i32); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - DAG.getNode(ISD::ADD, dl, getPointerTy(), - FrameAddr, Offset), + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), MachinePointerInfo(), false, false, false, 0); } // Just load the return address off the stack. SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, MachinePointerInfo(), false, false, false, 0); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, + MachinePointerInfo(), false, false, false, 0); } SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, @@ -11000,13 +11048,13 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SDLoc dl(Op); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - bool isPPC64 = PtrVT == MVT::i64; - MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); + bool isPPC64 = PtrVT == MVT::i64; + // Naked functions never have a frame pointer, and so we use r1. For all // other functions, this decision must be delayed until during PEI. unsigned FrameReg; @@ -11026,8 +11074,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned PPCTargetLowering::getRegisterByName(const char* RegName, - EVT VT) const { +unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const { bool isPPC64 = Subtarget.isPPC64(); bool isDarwinABI = Subtarget.isDarwinABI(); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index 02242b5..6e13533 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -423,7 +423,9 @@ namespace llvm { /// DAG node. const char *getTargetNodeName(unsigned Opcode) const override; - MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i32; + } bool isCheapToSpeculateCttz() const override { return true; @@ -434,7 +436,8 @@ namespace llvm { } /// getSetCCResultType - Return the ISD::SETCC ValueType - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const override; /// Return true if target always beneficiates from combining into FMA for a /// given value type. This must typically return false on targets where FMA @@ -487,7 +490,8 @@ namespace llvm { SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector<SDNode *> *Created) const override; - unsigned getRegisterByName(const char* RegName, EVT VT) const override; + unsigned getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const override; void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, @@ -519,8 +523,7 @@ namespace llvm { MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, MachineBasicBlock *MBB) const; - ConstraintType - getConstraintType(const std::string &Constraint) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. @@ -529,13 +532,13 @@ namespace llvm { std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const override; + StringRef Constraint, MVT VT) const override; /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. - unsigned getByValTypeAlignment(Type *Ty) const override; + unsigned getByValTypeAlignment(Type *Ty, + const DataLayout &DL) const override; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. @@ -544,8 +547,8 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; - unsigned getInlineAsmMemConstraint( - const std::string &ConstraintCode) const override { + unsigned + getInlineAsmMemConstraint(StringRef ConstraintCode) const override { if (ConstraintCode == "es") return InlineAsm::Constraint_es; else if (ConstraintCode == "o") @@ -561,8 +564,8 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, - unsigned AS) const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, + Type *Ty, unsigned AS) const override; /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can @@ -745,7 +748,7 @@ namespace llvm { SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall, - bool isVarArg, bool IsPatchPoint, + bool isVarArg, bool IsPatchPoint, bool hasNest, SelectionDAG &DAG, SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 696a838..bf6e402 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -57,6 +57,10 @@ static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), cl::Hidden); +static cl::opt<bool> +UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, + cl::desc("Use the old (incorrect) instruction latency calculation")); + // Pin the vtable to this file. void PPCInstrInfo::anchor() {} @@ -103,6 +107,35 @@ PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, return new ScoreboardHazardRecognizer(II, DAG); } +unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost) const { + if (!ItinData || UseOldLatencyCalc) + return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost); + + // The default implementation of getInstrLatency calls getStageLatency, but + // getStageLatency does not do the right thing for us. While we have + // itinerary, most cores are fully pipelined, and so the itineraries only + // express the first part of the pipeline, not every stage. Instead, we need + // to use the listed output operand cycle number (using operand 0 here, which + // is an output). + + unsigned Latency = 1; + unsigned DefClass = MI->getDesc().getSchedClass(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || MO.isImplicit()) + continue; + + int Cycle = ItinData->getOperandCycle(DefClass, i); + if (Cycle < 0) + continue; + + Latency = std::max(Latency, (unsigned) Cycle); + } + + return Latency; +} int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h index e2d6346..40badae 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -95,6 +95,10 @@ public: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override; + unsigned getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost = nullptr) const override; + int getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *UseMI, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 43ba499..20c95fe 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -989,6 +989,18 @@ def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B), def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B), (XVDIVDP $A, $B)>; +// Reciprocal estimate +def : Pat<(int_ppc_vsx_xvresp v4f32:$A), + (XVRESP $A)>; +def : Pat<(int_ppc_vsx_xvredp v2f64:$A), + (XVREDP $A)>; + +// Recip. square root estimate +def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A), + (XVRSQRTESP $A)>; +def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A), + (XVRSQRTEDP $A)>; + } // AddedComplexity } // HasVSX @@ -1013,6 +1025,9 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. v4i32:$XB)))]>; } // isCommutable + def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B), + (XXLEQV $A, $B)>; + def XXLORC : XX3Form<60, 170, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlorc $XT, $XA, $XB", IIC_VecGeneral, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 656376c..2b09b2f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -165,8 +165,7 @@ void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); - const PPCFrameLowering *PPCFI = - static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering()); + const PPCFrameLowering *TFI = getFrameLowering(MF); // The ZERO register is not really a register, but the representation of r0 // when used in instructions that treat r0 as the constant 0. @@ -209,7 +208,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::X1); Reserved.set(PPC::X13); - if (PPCFI->needsFP(MF)) + if (TFI->needsFP(MF)) Reserved.set(PPC::X31); if (hasBasePointer(MF)) @@ -230,7 +229,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { } } - if (PPCFI->needsFP(MF)) + if (TFI->needsFP(MF)) Reserved.set(PPC::R31); if (hasBasePointer(MF)) { @@ -256,8 +255,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); - const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + const PPCFrameLowering *TFI = getFrameLowering(MF); const unsigned DefaultSafety = 1; switch (RC->getID()) { @@ -341,7 +339,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { unsigned FrameSize = MFI->getStackSize(); // Get stack alignments. - unsigned TargetAlign = Subtarget.getFrameLowering()->getStackAlignment(); + const PPCFrameLowering *TFI = getFrameLowering(MF); + unsigned TargetAlign = TFI->getStackAlignment(); unsigned MaxAlign = MFI->getMaxAlignment(); assert((maxCallFrameSize & (MaxAlign-1)) == 0 && "Maximum call-frame size not sufficiently aligned"); @@ -864,8 +863,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); - const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + const PPCFrameLowering *TFI = getFrameLowering(MF); if (!TM.isPPC64()) return TFI->hasFP(MF) ? PPC::R31 : PPC::R1; @@ -908,10 +906,10 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const { } bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const { - const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); + const PPCFrameLowering *TFI = getFrameLowering(MF); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); - unsigned StackAlign = Subtarget.getFrameLowering()->getStackAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->hasFnAttribute(Attribute::StackAlignment)); @@ -946,11 +944,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { MachineBasicBlock &MBB = *MI->getParent(); MachineFunction &MF = *MBB.getParent(); - const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); - const PPCFrameLowering *PPCFI = - static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering()); - unsigned StackEst = - PPCFI->determineFrameLayout(MF, false, true); + const PPCFrameLowering *TFI = getFrameLowering(MF); + unsigned StackEst = TFI->determineFrameLayout(MF, false, true); // If we likely don't need a stack frame, then we probably don't need a // virtual base register either. @@ -1034,4 +1029,3 @@ bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, MI->getOpcode() == TargetOpcode::PATCHPOINT || (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0)); } - diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td index 635d154..267f567 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td @@ -315,6 +315,10 @@ def P7Itineraries : ProcessorItineraries< P7_DU3, P7_DU4], 0>, InstrStage<1, [P7_VS1, P7_VS2]>], [5, 1, 1]>, + InstrItinData<IIC_FPAddSub , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1]>, InstrItinData<IIC_FPCompare , [InstrStage<1, [P7_DU1, P7_DU2, P7_DU3, P7_DU4], 0>, InstrStage<1, [P7_VS1, P7_VS2]>], diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td index 020739b..69e6d05 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP8.td @@ -323,6 +323,10 @@ def P8Itineraries : ProcessorItineraries< P8_DU4, P8_DU5, P8_DU6], 0>, InstrStage<1, [P8_FPU1, P8_FPU2]>], [5, 1, 1]>, + InstrItinData<IIC_FPAddSub , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [5, 1, 1]>, InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6], 0>, InstrStage<1, [P8_FPU1, P8_FPU2]>], diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp deleted file mode 100644 index dc16742..0000000 --- a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp +++ /dev/null @@ -1,22 +0,0 @@ -//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the PPCSelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#include "PPCTargetMachine.h" -using namespace llvm; - -#define DEBUG_TYPE "powerpc-selectiondag-info" - -PPCSelectionDAGInfo::PPCSelectionDAGInfo(const DataLayout *DL) - : TargetSelectionDAGInfo(DL) {} - -PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h deleted file mode 100644 index 2c1378d..0000000 --- a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the PowerPC subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H -#define LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -class PPCTargetMachine; - -class PPCSelectionDAGInfo : public TargetSelectionDAGInfo { -public: - explicit PPCSelectionDAGInfo(const DataLayout *DL); - ~PPCSelectionDAGInfo(); -}; - -} - -#endif diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index cf603fe..58dacca 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -53,7 +53,7 @@ PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU, IsPPC64(TargetTriple.getArch() == Triple::ppc64 || TargetTriple.getArch() == Triple::ppc64le), TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, FS)), - InstrInfo(*this), TLInfo(TM, *this), TSInfo(TM.getDataLayout()) {} + InstrInfo(*this), TLInfo(TM, *this) {} void PPCSubtarget::initializeEnvironment() { StackAlignment = 16; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h index e9cc3d4..0616c1f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -17,10 +17,10 @@ #include "PPCFrameLowering.h" #include "PPCISelLowering.h" #include "PPCInstrInfo.h" -#include "PPCSelectionDAGInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -129,7 +129,7 @@ protected: PPCFrameLowering FrameLowering; PPCInstrInfo InstrInfo; PPCTargetLowering TLInfo; - PPCSelectionDAGInfo TSInfo; + TargetSelectionDAGInfo TSInfo; public: /// This constructor initializes the data members to match that @@ -164,7 +164,7 @@ public: const PPCTargetLowering *getTargetLowering() const override { return &TLInfo; } - const PPCSelectionDAGInfo *getSelectionDAGInfo() const override { + const TargetSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } const PPCRegisterInfo *getRegisterInfo() const override { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 074bc87..1daf244 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -172,7 +172,26 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU, computeFSAdditions(FS, OL, TT), Options, RM, CM, OL), TLOF(createTLOF(getTargetTriple())), - TargetABI(computeTargetABI(TT, Options)) { + TargetABI(computeTargetABI(TT, Options)), + Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) { + + // For the estimates, convergence is quadratic, so we essentially double the + // number of digits correct after every iteration. For both FRE and FRSQRTE, + // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), + // this is 2^-14. IEEE float has 23 digits and double has 52 digits. + unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3, + RefinementSteps64 = RefinementSteps + 1; + + this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps); + this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps); + this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps); + this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps); + + this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64); + this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64); + this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64); + this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64); + initAsmInfo(); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h index 5c0f7e6..6496339 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -29,6 +29,8 @@ public: private: std::unique_ptr<TargetLoweringObjectFile> TLOF; PPCABI TargetABI; + PPCSubtarget Subtarget; + mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap; public: diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 25d563a..e21c2b7 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -317,7 +317,7 @@ unsigned PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) { // Legalize the type. - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && "Invalid Opcode"); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 35e7a14..368bef9 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -38,7 +38,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> { public: explicit PPCTTIImpl(const PPCTargetMachine *TM, Function &F) - : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} // Provide value semantics. MSVC requires that we spell all of these out. PPCTTIImpl(const PPCTTIImpl &Arg) @@ -46,18 +47,6 @@ public: PPCTTIImpl(PPCTTIImpl &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {} - PPCTTIImpl &operator=(const PPCTTIImpl &RHS) { - BaseT::operator=(static_cast<const BaseT &>(RHS)); - ST = RHS.ST; - TLI = RHS.TLI; - return *this; - } - PPCTTIImpl &operator=(PPCTTIImpl &&RHS) { - BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); - ST = std::move(RHS.ST); - TLI = std::move(RHS.TLI); - return *this; - } /// \name Scalar TTI Implementations /// @{ diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index f352fa6..58d3c3d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -136,6 +136,16 @@ protected: // source of the copy, it must still be live here. We can't use // interval testing for a physical register, so as long as we're // walking the MIs we may as well test liveness here. + // + // FIXME: There is a case that occurs in practice, like this: + // %vreg9<def> = COPY %F1; VSSRC:%vreg9 + // ... + // %vreg6<def> = COPY %vreg9; VSSRC:%vreg6,%vreg9 + // %vreg7<def> = COPY %vreg9; VSSRC:%vreg7,%vreg9 + // %vreg9<def,tied1> = XSMADDASP %vreg9<tied0>, %vreg1, %vreg4; VSSRC: + // %vreg6<def,tied1> = XSMADDASP %vreg6<tied0>, %vreg1, %vreg2; VSSRC: + // %vreg7<def,tied1> = XSMADDASP %vreg7<tied0>, %vreg1, %vreg3; VSSRC: + // which prevents an otherwise-profitable transformation. bool OtherUsers = false, KillsAddendSrc = false; for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI); J != JE; --J) { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index e7ab71a..3fb1dcc 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -80,6 +80,7 @@ struct PPCVSXSwapEntry { unsigned int IsSwap : 1; unsigned int MentionsPhysVR : 1; unsigned int IsSwappable : 1; + unsigned int MentionsPartialVR : 1; unsigned int SpecialHandling : 3; unsigned int WebRejected : 1; unsigned int WillRemove : 1; @@ -91,7 +92,9 @@ enum SHValues { SH_INSERT, SH_NOSWAP_LD, SH_NOSWAP_ST, - SH_SPLAT + SH_SPLAT, + SH_XXPERMDI, + SH_COPYSCALAR }; struct PPCVSXSwapRemoval : public MachineFunctionPass { @@ -167,6 +170,21 @@ private: isRegInClass(Reg, &PPC::VRRCRegClass)); } + // Return true iff the given register is a partial vector register. + bool isScalarVecReg(unsigned Reg) { + return (isRegInClass(Reg, &PPC::VSFRCRegClass) || + isRegInClass(Reg, &PPC::VSSRCRegClass)); + } + + // Return true iff the given register mentions all or part of a + // vector register. Also sets Partial to true if the mention + // is for just the floating-point register overlap of the register. + bool isAnyVecReg(unsigned Reg, bool &Partial) { + if (isScalarVecReg(Reg)) + Partial = true; + return isScalarVecReg(Reg) || isVecReg(Reg); + } + public: // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { @@ -223,12 +241,13 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { for (MachineInstr &MI : MBB) { bool RelevantInstr = false; + bool Partial = false; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (isVecReg(Reg)) { + if (isAnyVecReg(Reg, Partial)) { RelevantInstr = true; break; } @@ -250,8 +269,13 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { // Unless noted otherwise, an instruction is considered // safe for the optimization. There are a large number of // such true-SIMD instructions (all vector math, logical, - // select, compare, etc.). - SwapVector[VecIdx].IsSwappable = 1; + // select, compare, etc.). However, if the instruction + // mentions a partial vector register and does not have + // special handling defined, it is not swappable. + if (Partial) + SwapVector[VecIdx].MentionsPartialVR = 1; + else + SwapVector[VecIdx].IsSwappable = 1; break; case PPC::XXPERMDI: { // This is a swap if it is of the form XXPERMDI t, s, s, 2. @@ -269,25 +293,37 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { VecIdx); if (trueReg1 == trueReg2) SwapVector[VecIdx].IsSwap = 1; - } + else { + // We can still handle these if the two registers are not + // identical, by adjusting the form of the XXPERMDI. + SwapVector[VecIdx].IsSwappable = 1; + SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI; + } // This is a doubleword splat if it is of the form // XXPERMDI t, s, s, 0 or XXPERMDI t, s, s, 3. As above we // must look through chains of copy-likes to find the source // register. We turn off the marking for mention of a physical // register, because splatting it is safe; the optimization - // will not swap the value in the physical register. - else if (immed == 0 || immed == 3) { + // will not swap the value in the physical register. Whether + // or not the two input registers are identical, we can handle + // these by adjusting the form of the XXPERMDI. + } else if (immed == 0 || immed == 3) { + + SwapVector[VecIdx].IsSwappable = 1; + SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI; + unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(), VecIdx); unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(), VecIdx); - if (trueReg1 == trueReg2) { - SwapVector[VecIdx].IsSwappable = 1; + if (trueReg1 == trueReg2) SwapVector[VecIdx].MentionsPhysVR = 0; - } + + } else { + // We can still handle these by adjusting the form of the XXPERMDI. + SwapVector[VecIdx].IsSwappable = 1; + SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI; } - // Any other form of XXPERMDI is lane-sensitive and unsafe - // for the optimization. break; } case PPC::LVX: @@ -324,7 +360,32 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { if (isVecReg(MI.getOperand(0).getReg()) && isVecReg(MI.getOperand(1).getReg())) SwapVector[VecIdx].IsSwappable = 1; + // If we have a copy from one scalar floating-point register + // to another, we can accept this even if it is a physical + // register. The only way this gets involved is if it feeds + // a SUBREG_TO_REG, which is handled by introducing a swap. + else if (isScalarVecReg(MI.getOperand(0).getReg()) && + isScalarVecReg(MI.getOperand(1).getReg())) + SwapVector[VecIdx].IsSwappable = 1; + break; + case PPC::SUBREG_TO_REG: { + // These are fine provided they are moving between full vector + // register classes. If they are moving from a scalar + // floating-point class to a vector class, we can handle those + // as well, provided we introduce a swap. It is generally the + // case that we will introduce fewer swaps than we remove, but + // (FIXME) a cost model could be used. However, introduced + // swaps could potentially be CSEd, so this is not trivial. + if (isVecReg(MI.getOperand(0).getReg()) && + isVecReg(MI.getOperand(2).getReg())) + SwapVector[VecIdx].IsSwappable = 1; + else if (isVecReg(MI.getOperand(0).getReg()) && + isScalarVecReg(MI.getOperand(2).getReg())) { + SwapVector[VecIdx].IsSwappable = 1; + SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYSCALAR; + } break; + } case PPC::VSPLTB: case PPC::VSPLTH: case PPC::VSPLTW: @@ -425,6 +486,10 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { case PPC::VUPKLSW: case PPC::XXMRGHW: case PPC::XXMRGLW: + // XXSLDWI could be replaced by a general permute with one of three + // permute control vectors (for shift values 1, 2, 3). However, + // VPERM has a more restrictive register class. + case PPC::XXSLDWI: case PPC::XXSPLTW: break; } @@ -501,18 +566,20 @@ void PPCVSXSwapRemoval::formWebs() { DEBUG(MI->dump()); // It's sufficient to walk vector uses and join them to their unique - // definitions. In addition, check *all* vector register operands - // for physical regs. + // definitions. In addition, check full vector register operands + // for physical regs. We exclude partial-vector register operands + // because we can handle them if copied to a full vector. for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!isVecReg(Reg)) + if (!isVecReg(Reg) && !isScalarVecReg(Reg)) continue; if (!TargetRegisterInfo::isVirtualRegister(Reg)) { - SwapVector[EntryIdx].MentionsPhysVR = 1; + if (!(MI->isCopy() && isScalarVecReg(Reg))) + SwapVector[EntryIdx].MentionsPhysVR = 1; continue; } @@ -545,15 +612,21 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) { int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId); - // Reject webs containing mentions of physical registers, or containing - // operations that we don't know how to handle in a lane-permuted region. + // If representative is already rejected, don't waste further time. + if (SwapVector[Repr].WebRejected) + continue; + + // Reject webs containing mentions of physical or partial registers, or + // containing operations that we don't know how to handle in a lane- + // permuted region. if (SwapVector[EntryIdx].MentionsPhysVR || + SwapVector[EntryIdx].MentionsPartialVR || !(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) { SwapVector[Repr].WebRejected = 1; DEBUG(dbgs() << - format("Web %d rejected for physreg, subreg, or not swap[pable]\n", + format("Web %d rejected for physreg, partial reg, or not swap[pable]\n", Repr)); DEBUG(dbgs() << " in " << EntryIdx << ": "); DEBUG(SwapVector[EntryIdx].VSEMI->dump()); @@ -588,7 +661,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { } } - // Reject webs than contain swapping stores that are fed by something + // Reject webs that contain swapping stores that are fed by something // other than a swap instruction. } else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; @@ -670,7 +743,8 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() { // The identified swap entry requires special handling to allow its // containing computation to be optimized. Perform that handling // here. -// FIXME: This code is to be phased in with subsequent patches. +// FIXME: Additional opportunities will be phased in with subsequent +// patches. void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { switch (SwapVector[EntryIdx].SpecialHandling) { @@ -704,6 +778,91 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { break; } + // For an XXPERMDI that isn't handled otherwise, we need to + // reverse the order of the operands. If the selector operand + // has a value of 0 or 3, we need to change it to 3 or 0, + // respectively. Otherwise we should leave it alone. (This + // is equivalent to reversing the two bits of the selector + // operand and complementing the result.) + case SHValues::SH_XXPERMDI: { + MachineInstr *MI = SwapVector[EntryIdx].VSEMI; + + DEBUG(dbgs() << "Changing XXPERMDI: "); + DEBUG(MI->dump()); + + unsigned Selector = MI->getOperand(3).getImm(); + if (Selector == 0 || Selector == 3) + Selector = 3 - Selector; + MI->getOperand(3).setImm(Selector); + + unsigned Reg1 = MI->getOperand(1).getReg(); + unsigned Reg2 = MI->getOperand(2).getReg(); + MI->getOperand(1).setReg(Reg2); + MI->getOperand(2).setReg(Reg1); + + DEBUG(dbgs() << " Into: "); + DEBUG(MI->dump()); + break; + } + + // For a copy from a scalar floating-point register to a vector + // register, removing swaps will leave the copied value in the + // wrong lane. Insert a swap following the copy to fix this. + case SHValues::SH_COPYSCALAR: { + MachineInstr *MI = SwapVector[EntryIdx].VSEMI; + + DEBUG(dbgs() << "Changing SUBREG_TO_REG: "); + DEBUG(MI->dump()); + + unsigned DstReg = MI->getOperand(0).getReg(); + const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); + unsigned NewVReg = MRI->createVirtualRegister(DstRC); + + MI->getOperand(0).setReg(NewVReg); + DEBUG(dbgs() << " Into: "); + DEBUG(MI->dump()); + + MachineBasicBlock::iterator InsertPoint = MI->getNextNode(); + + // Note that an XXPERMDI requires a VSRC, so if the SUBREG_TO_REG + // is copying to a VRRC, we need to be careful to avoid a register + // assignment problem. In this case we must copy from VRRC to VSRC + // prior to the swap, and from VSRC to VRRC following the swap. + // Coalescing will usually remove all this mess. + + if (DstRC == &PPC::VRRCRegClass) { + unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass); + unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass); + + BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), + TII->get(PPC::COPY), VSRCTmp1) + .addReg(NewVReg); + DEBUG(MI->getNextNode()->dump()); + + BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), + TII->get(PPC::XXPERMDI), VSRCTmp2) + .addReg(VSRCTmp1) + .addReg(VSRCTmp1) + .addImm(2); + DEBUG(MI->getNextNode()->getNextNode()->dump()); + + BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), + TII->get(PPC::COPY), DstReg) + .addReg(VSRCTmp2); + DEBUG(MI->getNextNode()->getNextNode()->getNextNode()->dump()); + + } else { + + BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), + TII->get(PPC::XXPERMDI), DstReg) + .addReg(NewVReg) + .addReg(NewVReg) + .addImm(2); + + DEBUG(MI->getNextNode()->dump()); + } + break; + } } } @@ -756,6 +915,8 @@ void PPCVSXSwapRemoval::dumpSwapVector() { DEBUG(dbgs() << "swap "); if (SwapVector[EntryIdx].MentionsPhysVR) DEBUG(dbgs() << "physreg "); + if (SwapVector[EntryIdx].MentionsPartialVR) + DEBUG(dbgs() << "partialreg "); if (SwapVector[EntryIdx].IsSwappable) { DEBUG(dbgs() << "swappable "); @@ -780,6 +941,12 @@ void PPCVSXSwapRemoval::dumpSwapVector() { case SH_SPLAT: DEBUG(dbgs() << "special:splat "); break; + case SH_XXPERMDI: + DEBUG(dbgs() << "special:xxpermdi "); + break; + case SH_COPYSCALAR: + DEBUG(dbgs() << "special:copyscalar "); + break; } } |