diff options
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 333 |
1 files changed, 307 insertions, 26 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 254fea6..95efc11 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -14,24 +14,30 @@ #define DEBUG_TYPE "ppc-codegen" #include "PPC.h" -#include "PPCTargetMachine.h" #include "MCTargetDesc/PPCPredicates.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "PPCTargetMachine.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/GlobalValue.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; +namespace llvm { + void initializePPCDAGToDAGISelPass(PassRegistry&); +} + namespace { //===--------------------------------------------------------------------===// /// PPCDAGToDAGISel - PPC specific code to select PPC machine @@ -46,7 +52,9 @@ namespace { explicit PPCDAGToDAGISel(PPCTargetMachine &tm) : SelectionDAGISel(tm), TM(tm), PPCLowering(*TM.getTargetLowering()), - PPCSubTarget(*TM.getSubtargetImpl()) {} + PPCSubTarget(*TM.getSubtargetImpl()) { + initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF) { // Make sure we re-emit a set of the global base reg if necessary @@ -59,6 +67,8 @@ namespace { return true; } + virtual void PostprocessISelDAG(); + /// getI32Imm - Return a target constant with the specified value, of type /// i32. inline SDValue getI32Imm(unsigned Imm) { @@ -110,10 +120,10 @@ namespace { } /// SelectAddrImmOffs - Return true if the operand is valid for a preinc - /// immediate field. Because preinc imms have already been validated, just - /// accept it. + /// immediate field. Note that the operand at this point is already the + /// result of a prior SelectAddressRegImm call. bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { - if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo || + if (N.getOpcode() == ISD::TargetConstant || N.getOpcode() == ISD::TargetGlobalAddress) { Out = N; return true; @@ -122,18 +132,6 @@ namespace { return false; } - /// SelectAddrIdxOffs - Return true if the operand is valid for a preinc - /// index field. Because preinc imms have already been validated, just - /// accept it. - bool SelectAddrIdxOffs(SDValue N, SDValue &Out) const { - if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo || - N.getOpcode() == ISD::TargetGlobalAddress) - return false; - - Out = N; - return true; - } - /// SelectAddrIdx - Given the specified addressed, check to see if it can be /// represented as an indexed [r+r] operation. Returns false if it can /// be represented by [r+imm], which are preferred. @@ -154,6 +152,12 @@ namespace { return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG); } + // Select an address into a single register. + bool SelectAddr(SDValue N, SDValue &Base) { + Base = N; + return true; + } + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. It is always correct to compute the value into /// a register. The case of adding a (possibly relocatable) constant to a @@ -1040,7 +1044,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { break; SDValue Offset = LD->getOffset(); - if (isa<ConstantSDNode>(Offset) || + if (Offset.getOpcode() == ISD::TargetConstant || Offset.getOpcode() == ISD::TargetGlobalAddress) { unsigned Opcode; @@ -1107,7 +1111,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); - SDValue Ops[] = { Offset, Base, Chain }; + SDValue Ops[] = { Base, Offset, Chain }; return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), PPCLowering.getPointerTy(), MVT::Other, Ops, 3); @@ -1268,11 +1272,277 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { Chain), 0); return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); } + case PPCISD::TOC_ENTRY: { + assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI"); + + // For medium and large code model, we generate two instructions as + // described below. Otherwise we allow SelectCodeCommon to handle this, + // selecting one of LDtoc, LDtocJTI, and LDtocCPT. + CodeModel::Model CModel = TM.getCodeModel(); + if (CModel != CodeModel::Medium && CModel != CodeModel::Large) + break; + + // The first source operand is a TargetGlobalAddress or a + // TargetJumpTable. If it is an externally defined symbol, a symbol + // with common linkage, a function address, or a jump table address, + // or if we are generating code for large code model, we generate: + // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>)) + // Otherwise we generate: + // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>) + SDValue GA = N->getOperand(0); + SDValue TOCbase = N->getOperand(1); + SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, + TOCbase, GA); + + if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large) + return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, + SDValue(Tmp, 0)); + + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { + const GlobalValue *GValue = G->getGlobal(); + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); + assert((GVar || isa<Function>(RealGValue)) && + "Unexpected global value subclass!"); + + // An external variable is one without an initializer. For these, + // for variables with common linkage, and for Functions, generate + // the LDtocL form. + if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || + RealGValue->hasAvailableExternallyLinkage()) + return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, + SDValue(Tmp, 0)); + } + + return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, + SDValue(Tmp, 0), GA); + } + case PPCISD::VADD_SPLAT: { + // This expands into one of three sequences, depending on whether + // the first operand is odd or even, positive or negative. + assert(isa<ConstantSDNode>(N->getOperand(0)) && + isa<ConstantSDNode>(N->getOperand(1)) && + "Invalid operand on VADD_SPLAT!"); + + int Elt = N->getConstantOperandVal(0); + int EltSize = N->getConstantOperandVal(1); + unsigned Opc1, Opc2, Opc3; + EVT VT; + + if (EltSize == 1) { + Opc1 = PPC::VSPLTISB; + Opc2 = PPC::VADDUBM; + Opc3 = PPC::VSUBUBM; + VT = MVT::v16i8; + } else if (EltSize == 2) { + Opc1 = PPC::VSPLTISH; + Opc2 = PPC::VADDUHM; + Opc3 = PPC::VSUBUHM; + VT = MVT::v8i16; + } else { + assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!"); + Opc1 = PPC::VSPLTISW; + Opc2 = PPC::VADDUWM; + Opc3 = PPC::VSUBUWM; + VT = MVT::v4i32; + } + + if ((Elt & 1) == 0) { + // Elt is even, in the range [-32,-18] + [16,30]. + // + // Convert: VADD_SPLAT elt, size + // Into: tmp = VSPLTIS[BHW] elt + // VADDU[BHW]M tmp, tmp + // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 + SDValue EltVal = getI32Imm(Elt >> 1); + SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + SDValue TmpVal = SDValue(Tmp, 0); + return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal); + + } else if (Elt > 0) { + // Elt is odd and positive, in the range [17,31]. + // + // Convert: VADD_SPLAT elt, size + // Into: tmp1 = VSPLTIS[BHW] elt-16 + // tmp2 = VSPLTIS[BHW] -16 + // VSUBU[BHW]M tmp1, tmp2 + SDValue EltVal = getI32Imm(Elt - 16); + SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + EltVal = getI32Imm(-16); + SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), + SDValue(Tmp2, 0)); + + } else { + // Elt is odd and negative, in the range [-31,-17]. + // + // Convert: VADD_SPLAT elt, size + // Into: tmp1 = VSPLTIS[BHW] elt+16 + // tmp2 = VSPLTIS[BHW] -16 + // VADDU[BHW]M tmp1, tmp2 + SDValue EltVal = getI32Imm(Elt + 16); + SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + EltVal = getI32Imm(-16); + SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), + SDValue(Tmp2, 0)); + } + } } return SelectCode(N); } +/// PostProcessISelDAG - Perform some late peephole optimizations +/// on the DAG representation. +void PPCDAGToDAGISel::PostprocessISelDAG() { + + // Skip peepholes at -O0. + if (TM.getOptLevel() == CodeGenOpt::None) + return; + + // These optimizations are currently supported only for 64-bit SVR4. + if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64()) + return; + + SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); + ++Position; + + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = --Position; + // Skip dead nodes and any non-machine opcodes. + if (N->use_empty() || !N->isMachineOpcode()) + continue; + + unsigned FirstOp; + unsigned StorageOpcode = N->getMachineOpcode(); + + switch (StorageOpcode) { + default: continue; + + case PPC::LBZ: + case PPC::LBZ8: + case PPC::LD: + case PPC::LFD: + case PPC::LFS: + case PPC::LHA: + case PPC::LHA8: + case PPC::LHZ: + case PPC::LHZ8: + case PPC::LWA: + case PPC::LWZ: + case PPC::LWZ8: + FirstOp = 0; + break; + + case PPC::STB: + case PPC::STB8: + case PPC::STD: + case PPC::STFD: + case PPC::STFS: + case PPC::STH: + case PPC::STH8: + case PPC::STW: + case PPC::STW8: + FirstOp = 1; + break; + } + + // If this is a load or store with a zero offset, we may be able to + // fold an add-immediate into the memory operation. + if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) || + N->getConstantOperandVal(FirstOp) != 0) + continue; + + SDValue Base = N->getOperand(FirstOp + 1); + if (!Base.isMachineOpcode()) + continue; + + unsigned Flags = 0; + bool ReplaceFlags = true; + + // When the feeding operation is an add-immediate of some sort, + // determine whether we need to add relocation information to the + // target flags on the immediate operand when we fold it into the + // load instruction. + // + // For something like ADDItocL, the relocation information is + // inferred from the opcode; when we process it in the AsmPrinter, + // we add the necessary relocation there. A load, though, can receive + // relocation from various flavors of ADDIxxx, so we need to carry + // the relocation information in the target flags. + switch (Base.getMachineOpcode()) { + default: continue; + + case PPC::ADDI8: + case PPC::ADDI: + // In some cases (such as TLS) the relocation information + // is already in place on the operand, so copying the operand + // is sufficient. + ReplaceFlags = false; + // For these cases, the immediate may not be divisible by 4, in + // which case the fold is illegal for DS-form instructions. (The + // other cases provide aligned addresses and are always safe.) + if ((StorageOpcode == PPC::LWA || + StorageOpcode == PPC::LD || + StorageOpcode == PPC::STD) && + (!isa<ConstantSDNode>(Base.getOperand(1)) || + Base.getConstantOperandVal(1) % 4 != 0)) + continue; + break; + case PPC::ADDIdtprelL: + Flags = PPCII::MO_DTPREL16_LO; + break; + case PPC::ADDItlsldL: + Flags = PPCII::MO_TLSLD16_LO; + break; + case PPC::ADDItocL: + Flags = PPCII::MO_TOC16_LO; + break; + } + + // We found an opportunity. Reverse the operands from the add + // immediate and substitute them into the load or store. If + // needed, update the target flags for the immediate operand to + // reflect the necessary relocation information. + DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); + DEBUG(Base->dump(CurDAG)); + DEBUG(dbgs() << "\nN: "); + DEBUG(N->dump(CurDAG)); + DEBUG(dbgs() << "\n"); + + SDValue ImmOpnd = Base.getOperand(1); + + // If the relocation information isn't already present on the + // immediate operand, add it now. + if (ReplaceFlags) { + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { + DebugLoc dl = GA->getDebugLoc(); + const GlobalValue *GV = GA->getGlobal(); + ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags); + } else if (ConstantPoolSDNode *CP = + dyn_cast<ConstantPoolSDNode>(ImmOpnd)) { + const Constant *C = CP->getConstVal(); + ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, + CP->getAlignment(), + 0, Flags); + } + } + + if (FirstOp == 1) // Store + (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, + Base.getOperand(0), N->getOperand(3)); + else // Load + (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), + N->getOperand(2)); + + // The add-immediate may now be dead, in which case remove it. + if (Base.getNode()->use_empty()) + CurDAG->RemoveDeadNode(Base.getNode()); + } +} /// createPPCISelDag - This pass converts a legalized DAG into a @@ -1282,3 +1552,14 @@ FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) { return new PPCDAGToDAGISel(TM); } +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection"; + PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0, + false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce); +} + |