diff options
Diffstat (limited to 'lib/Target')
130 files changed, 5914 insertions, 2261 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 8bf1b7c..08dc07c 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -93,7 +93,6 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM); FunctionPass *createARMCodePrinterPass(raw_ostream &O, ARMBaseTargetMachine &TM, - CodeGenOpt::Level OptLevel, bool Verbose); FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM, MachineCodeEmitter &MCE); diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index 005eb7a..15c9ec1 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -305,7 +305,7 @@ namespace ARM_AM { /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3 /// Return -1 if none of the above apply. /// See ARM Reference Manual A6.3.2. - static inline int getT2SOImmValSplat (unsigned V) { + static inline int getT2SOImmValSplat(unsigned V) { unsigned u, Vs, Imm; // control = 0 if ((V & 0xffffff00) == 0) @@ -459,13 +459,13 @@ namespace ARM_AM { // // addrmode5 := reg +/- imm8*4 // - // The first operand is always a Reg. The third field encodes the operation - // in bit 8, the immediate in bits 0-7. + // The first operand is always a Reg. The second operand encodes the + // operation in bit 8 and the immediate in bits 0-7. // - // This can also be used for FP load/store multiple ops. The third field encodes - // writeback mode in bit 8, the number of registers (or 2 times the number of - // registers for DPR ops) in bits 0-7. In addition, bit 9-11 encodes one of the - // following two sub-modes: + // This is also used for FP load/store multiple ops. The second operand + // encodes the writeback mode in bit 8 and the number of registers (or 2 + // times the number of registers for DPR ops) in bits 0-7. In addition, + // bits 9-11 encode one of the following two sub-modes: // // IA - Increment after // DB - Decrement before @@ -496,7 +496,29 @@ namespace ARM_AM { static inline bool getAM5WBFlag(unsigned AM5Opc) { return ((AM5Opc >> 8) & 1); } - + + //===--------------------------------------------------------------------===// + // Addressing Mode #6 + //===--------------------------------------------------------------------===// + // + // This is used for NEON load / store instructions. + // + // addrmode6 := reg with optional writeback + // + // This is stored in three operands [regaddr, regupdate, opc]. The first is + // the address register. The second register holds the value of a post-access + // increment for writeback or reg0 if no writeback or if the writeback + // increment is the size of the memory access. The third operand encodes + // whether there is writeback to the address register. + + static inline unsigned getAM6Opc(bool WB = false) { + return (int)WB; + } + + static inline bool getAM6WBFlag(unsigned Mode) { + return Mode & 1; + } + } // end namespace ARM_AM } // end namespace llvm diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 8424c2e..f295761 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -1155,16 +1155,17 @@ void Emitter<CodeEmitter>::emitMiscBranchInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); // Handle jump tables. - if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) { + if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd || + TID.Opcode == ARM::t2BR_JTr || TID.Opcode == ARM::t2BR_JTadd) { // First emit a ldr pc, [] instruction. emitDataProcessingInstruction(MI, ARM::PC); // Then emit the inline jump table. - unsigned JTIndex = (TID.Opcode == ARM::BR_JTr) + unsigned JTIndex = (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::t2BR_JTr) ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex(); emitInlineJumpTable(JTIndex); return; - } else if (TID.Opcode == ARM::BR_JTm) { + } else if (TID.Opcode == ARM::BR_JTm || TID.Opcode == ARM::t2BR_JTm) { // First emit a ldr pc, [] instruction. emitLoadStoreInstruction(MI, ARM::PC); diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index db723fe..9fedaa4 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -124,6 +124,7 @@ namespace { const TargetInstrInfo *TII; ARMFunctionInfo *AFI; bool isThumb; + bool isThumb2; public: static char ID; ARMConstantIslands() : MachineFunctionPass(&ID) {} @@ -213,6 +214,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) { TII = Fn.getTarget().getInstrInfo(); AFI = Fn.getInfo<ARMFunctionInfo>(); isThumb = AFI->isThumbFunction(); + isThumb2 = AFI->isThumb2Function(); HasFarJump = false; @@ -376,6 +378,9 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn, int UOpc = Opc; switch (Opc) { case ARM::tBR_JTr: + case ARM::t2BR_JTr: + case ARM::t2BR_JTm: + case ARM::t2BR_JTadd: // A Thumb table jump may involve padding; for the offsets to // be right, functions containing these must be 4-byte aligned. AFI->setAlign(2U); @@ -402,6 +407,16 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn, Bits = 11; Scale = 2; break; + case ARM::t2Bcc: + isCond = true; + UOpc = ARM::t2B; + Bits = 20; + Scale = 2; + break; + case ARM::t2B: + Bits = 24; + Scale = 2; + break; } // Record this immediate branch. @@ -447,21 +462,25 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn, Bits = 8; Scale = 4; // +-(offset_8*4) break; - case ARMII::AddrModeT1: + // addrmode6 has no immediate offset. + case ARMII::AddrModeT1_1: Bits = 5; // +offset_5 break; - case ARMII::AddrModeT2: + case ARMII::AddrModeT1_2: Bits = 5; Scale = 2; // +(offset_5*2) break; - case ARMII::AddrModeT4: + case ARMII::AddrModeT1_4: Bits = 5; Scale = 4; // +(offset_5*4) break; - case ARMII::AddrModeTs: + case ARMII::AddrModeT1_s: Bits = 8; Scale = 4; // +(offset_8*4) break; + case ARMII::AddrModeT2_pc: + Bits = 12; // +-offset_12 + break; } // Remember that this is a user of a CP entry. @@ -572,7 +591,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // There doesn't seem to be meaningful DebugInfo available; this doesn't // correspond to anything in the source. BuildMI(OrigBB, DebugLoc::getUnknownLoc(), - TII->get(isThumb ? ARM::tB : ARM::B)).addMBB(NewBB); + TII->get(isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B)).addMBB(NewBB); NumSplit++; // Update the CFG. All succs of OrigBB are now succs of NewBB. @@ -716,7 +735,8 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) { MachineBasicBlock *Succ = *MBB->succ_begin(); MachineBasicBlock *Pred = *MBB->pred_begin(); MachineInstr *PredMI = &Pred->back(); - if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB) + if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB + || PredMI->getOpcode() == ARM::t2B) return PredMI->getOperand(0).getMBB() == Succ; return false; } @@ -748,7 +768,10 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, // Thumb jump tables require padding. They should be at the end; // following unconditional branches are removed by AnalyzeBranch. MachineInstr *ThumbJTMI = NULL; - if (prior(MBB->end())->getOpcode() == ARM::tBR_JTr) + if ((prior(MBB->end())->getOpcode() == ARM::tBR_JTr) + || (prior(MBB->end())->getOpcode() == ARM::t2BR_JTr) + || (prior(MBB->end())->getOpcode() == ARM::t2BR_JTm) + || (prior(MBB->end())->getOpcode() == ARM::t2BR_JTadd)) ThumbJTMI = prior(MBB->end()); if (ThumbJTMI) { unsigned newMIOffset = GetOffsetOf(ThumbJTMI); @@ -839,7 +862,16 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) /// getUnconditionalBrDisp - Returns the maximum displacement that can fit in /// the specific unconditional branch instruction. static inline unsigned getUnconditionalBrDisp(int Opc) { - return (Opc == ARM::tB) ? ((1<<10)-1)*2 : ((1<<23)-1)*4; + switch (Opc) { + case ARM::tB: + return ((1<<10)-1)*2; + case ARM::t2B: + return ((1<<23)-1)*2; + default: + break; + } + + return ((1<<23)-1)*4; } /// AcceptWater - Small amount of common code factored out of the following. @@ -935,7 +967,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, // range, but if the preceding conditional branch is out of range, the // targets will be exchanged, and the altered branch may be out of // range, so the machinery has to know about it. - int UncondBr = isThumb ? ARM::tB : ARM::B; + int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B; BuildMI(UserMBB, DebugLoc::getUnknownLoc(), TII->get(UncondBr)).addMBB(*NewMBB); unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); @@ -1165,7 +1197,7 @@ bool ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *MBB = MI->getParent(); - assert(isThumb && "Expected a Thumb function!"); + assert(isThumb && !isThumb2 && "Expected a Thumb-1 function!"); // Use BL to implement far jump. Br.MaxDisp = (1 << 21) * 2; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 200371b..6485fc1 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -64,6 +64,8 @@ public: SDNode *Select(SDValue Op); virtual void InstructionSelect(); + bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A, + SDValue &B, SDValue &C); bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); bool SelectAddrMode2Offset(SDValue Op, SDValue N, @@ -74,9 +76,11 @@ public: SDValue &Offset, SDValue &Opc); bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base, SDValue &Offset); + bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update, + SDValue &Opc); bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset, - SDValue &Label); + SDValue &Label); bool SelectThumbAddrModeRR(SDValue Op, SDValue N, SDValue &Base, SDValue &Offset); @@ -92,20 +96,34 @@ public: bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base, SDValue &OffImm); - bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A, - SDValue &B, SDValue &C); bool SelectT2ShifterOperandReg(SDValue Op, SDValue N, SDValue &BaseReg, SDValue &Opc); - + bool SelectT2AddrModeImm12(SDValue Op, SDValue N, SDValue &Base, + SDValue &OffImm); + bool SelectT2AddrModeImm8(SDValue Op, SDValue N, SDValue &Base, + SDValue &OffImm); + bool SelectT2AddrModeImm8Offset(SDValue Op, SDValue N, + SDValue &OffImm); + bool SelectT2AddrModeImm8s4(SDValue Op, SDValue N, SDValue &Base, + SDValue &OffImm); + bool SelectT2AddrModeSoReg(SDValue Op, SDValue N, SDValue &Base, + SDValue &OffReg, SDValue &ShImm); + // Include the pieces autogenerated from the target description. #include "ARMGenDAGISel.inc" private: - /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for - /// inline asm expressions. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps); + /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for + /// ARM. + SDNode *SelectARMIndexedLoad(SDValue Op); + SDNode *SelectT2IndexedLoad(SDValue Op); + + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); }; } @@ -116,6 +134,30 @@ void ARMDAGToDAGISel::InstructionSelect() { CurDAG->RemoveDeadNodes(); } +bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op, + SDValue N, + SDValue &BaseReg, + SDValue &ShReg, + SDValue &Opc) { + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + + // Don't match base register only case. That is matched to a separate + // lower complexity pattern with explicit register operand. + if (ShOpcVal == ARM_AM::no_shift) return false; + + BaseReg = N.getOperand(0); + unsigned ShImmVal = 0; + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + ShReg = CurDAG->getRegister(0, MVT::i32); + ShImmVal = RHS->getZExtValue() & 31; + } else { + ShReg = N.getOperand(1); + } + Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), + MVT::i32); + return true; +} + bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { @@ -382,6 +424,16 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N, return true; } +bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N, + SDValue &Addr, SDValue &Update, + SDValue &Opc) { + Addr = N; + // The optional writeback is handled in ARMLoadStoreOpt. + Update = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32); + return true; +} + bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset, SDValue &Label) { if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { @@ -519,30 +571,6 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N, return false; } -bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op, - SDValue N, - SDValue &BaseReg, - SDValue &ShReg, - SDValue &Opc) { - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); - - // Don't match base register only case. That is matched to a separate - // lower complexity pattern with explicit register operand. - if (ShOpcVal == ARM_AM::no_shift) return false; - - BaseReg = N.getOperand(0); - unsigned ShImmVal = 0; - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - ShReg = CurDAG->getRegister(0, MVT::i32); - ShImmVal = RHS->getZExtValue() & 31; - } else { - ShReg = N.getOperand(1); - } - Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), - MVT::i32); - return true; -} - bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N, SDValue &BaseReg, SDValue &Opc) { @@ -563,11 +591,250 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N, return false; } +bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N, + SDValue &Base, SDValue &OffImm) { + // Match simple R + imm12 operands. + if (N.getOpcode() != ISD::ADD) + return false; + + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits. + Base = N.getOperand(0); + OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); + return true; + } + } + + return false; +} + +bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue Op, SDValue N, + SDValue &Base, SDValue &OffImm) { + if (N.getOpcode() == ISD::ADD) { + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if (RHSC < 0 && RHSC > -0x100) { // 8 bits. + Base = N.getOperand(0); + OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); + return true; + } + } + } else if (N.getOpcode() == ISD::SUB) { + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if (RHSC >= 0 && RHSC < 0x100) { // 8 bits. + Base = N.getOperand(0); + OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32); + return true; + } + } + } + + return false; +} + +bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDValue Op, SDValue N, + SDValue &OffImm){ + unsigned Opcode = Op.getOpcode(); + ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) + ? cast<LoadSDNode>(Op)->getAddressingMode() + : cast<StoreSDNode>(Op)->getAddressingMode(); + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N)) { + int RHSC = (int)RHS->getZExtValue(); + if (RHSC >= 0 && RHSC < 0x100) { // 8 bits. + OffImm = (AM == ISD::PRE_INC) + ? CurDAG->getTargetConstant(RHSC, MVT::i32) + : CurDAG->getTargetConstant(-RHSC, MVT::i32); + return true; + } + } + + return false; +} + +bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDValue Op, SDValue N, + SDValue &Base, SDValue &OffImm) { + if (N.getOpcode() == ISD::ADD) { + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if (((RHSC & 0x3) == 0) && (RHSC < 0 && RHSC > -0x400)) { // 8 bits. + Base = N.getOperand(0); + OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); + return true; + } + } + } else if (N.getOpcode() == ISD::SUB) { + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) { // 8 bits. + Base = N.getOperand(0); + OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32); + return true; + } + } + } + + return false; +} + +bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue Op, SDValue N, + SDValue &Base, + SDValue &OffReg, SDValue &ShImm) { + // Base only. + if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) { + Base = N; + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } else if (N.getOpcode() == ARMISD::Wrapper) { + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::TargetConstantPool) + return false; // We want to select t2LDRpci instead. + } + OffReg = CurDAG->getRegister(0, MVT::i32); + ShImm = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + // Look for (R + R) or (R + (R << [1,2,3])). + unsigned ShAmt = 0; + Base = N.getOperand(0); + OffReg = N.getOperand(1); + + // Swap if it is ((R << c) + R). + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg); + if (ShOpcVal != ARM_AM::lsl) { + ShOpcVal = ARM_AM::getShiftOpcForNode(Base); + if (ShOpcVal == ARM_AM::lsl) + std::swap(Base, OffReg); + } + + if (ShOpcVal == ARM_AM::lsl) { + // Check to see if the RHS of the shift is a constant, if not, we can't fold + // it. + if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { + ShAmt = Sh->getZExtValue(); + if (ShAmt >= 4) { + ShAmt = 0; + ShOpcVal = ARM_AM::no_shift; + } else + OffReg = OffReg.getOperand(0); + } else { + ShOpcVal = ARM_AM::no_shift; + } + } else if (SelectT2AddrModeImm12(Op, N, Base, ShImm) || + SelectT2AddrModeImm8 (Op, N, Base, ShImm)) + // Don't match if it's possible to match to one of the r +/- imm cases. + return false; + + ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32); + + return true; +} + +//===--------------------------------------------------------------------===// + /// getAL - Returns a ARMCC::AL immediate node. static inline SDValue getAL(SelectionDAG *CurDAG) { return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32); } +SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) { + LoadSDNode *LD = cast<LoadSDNode>(Op); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM == ISD::UNINDEXED) + return NULL; + + MVT LoadedVT = LD->getMemoryVT(); + SDValue Offset, AMOpc; + bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + unsigned Opcode = 0; + bool Match = false; + if (LoadedVT == MVT::i32 && + SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST; + Match = true; + } else if (LoadedVT == MVT::i16 && + SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) + ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) + : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); + } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { + if (LD->getExtensionType() == ISD::SEXTLOAD) { + if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; + } + } else { + if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST; + } + } + } + + if (Match) { + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), Chain }; + return CurDAG->getTargetNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32, + MVT::Other, Ops, 6); + } + + return NULL; +} + +SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) { + LoadSDNode *LD = cast<LoadSDNode>(Op); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM == ISD::UNINDEXED) + return NULL; + + MVT LoadedVT = LD->getMemoryVT(); + bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; + SDValue Offset; + bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + unsigned Opcode = 0; + bool Match = false; + if (SelectT2AddrModeImm8Offset(Op, LD->getOffset(), Offset)) { + switch (LoadedVT.getSimpleVT()) { + case MVT::i32: + Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; + break; + case MVT::i16: + if (isSExtLd) + Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; + else + Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; + break; + case MVT::i8: + case MVT::i1: + if (isSExtLd) + Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; + else + Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; + break; + default: + return NULL; + } + Match = true; + } + + if (Match) { + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[]= { Base, Offset, getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), Chain }; + return CurDAG->getTargetNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32, + MVT::Other, Ops, 5); + } + + return NULL; +} + SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDNode *N = Op.getNode(); @@ -698,47 +965,13 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { return CurDAG->getTargetNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5); } case ISD::LOAD: { - LoadSDNode *LD = cast<LoadSDNode>(Op); - ISD::MemIndexedMode AM = LD->getAddressingMode(); - MVT LoadedVT = LD->getMemoryVT(); - if (AM != ISD::UNINDEXED) { - SDValue Offset, AMOpc; - bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); - unsigned Opcode = 0; - bool Match = false; - if (LoadedVT == MVT::i32 && - SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) { - Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST; - Match = true; - } else if (LoadedVT == MVT::i16 && - SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) { - Match = true; - Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) - ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) - : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); - } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { - if (LD->getExtensionType() == ISD::SEXTLOAD) { - if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) { - Match = true; - Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; - } - } else { - if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) { - Match = true; - Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST; - } - } - } - - if (Match) { - SDValue Chain = LD->getChain(); - SDValue Base = LD->getBasePtr(); - SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), - CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getTargetNode(Opcode, dl, MVT::i32, MVT::i32, - MVT::Other, Ops, 6); - } - } + SDNode *ResNode = 0; + if (Subtarget->isThumb2()) + ResNode = SelectT2IndexedLoad(Op); + else + ResNode = SelectARMIndexedLoad(Op); + if (ResNode) + return ResNode; // Other cases are autogenerated. break; } @@ -751,7 +984,12 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) // Pattern complexity = 6 cost = 1 size = 0 - unsigned Opc = Subtarget->isThumb() ? ARM::tBcc : ARM::Bcc; + // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) + // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) + // Pattern complexity = 6 cost = 1 size = 0 + + unsigned Opc = Subtarget->isThumb() ? + ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; SDValue Chain = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c24bb2e..41c9ecc 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -231,16 +231,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); // ARM supports all 4 flavors of integer indexed load / store. - for (unsigned im = (unsigned)ISD::PRE_INC; - im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { - setIndexedLoadAction(im, MVT::i1, Legal); - setIndexedLoadAction(im, MVT::i8, Legal); - setIndexedLoadAction(im, MVT::i16, Legal); - setIndexedLoadAction(im, MVT::i32, Legal); - setIndexedStoreAction(im, MVT::i1, Legal); - setIndexedStoreAction(im, MVT::i8, Legal); - setIndexedStoreAction(im, MVT::i16, Legal); - setIndexedStoreAction(im, MVT::i32, Legal); + if (!Subtarget->isThumb1Only()) { + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, MVT::i1, Legal); + setIndexedLoadAction(im, MVT::i8, Legal); + setIndexedLoadAction(im, MVT::i16, Legal); + setIndexedLoadAction(im, MVT::i32, Legal); + setIndexedStoreAction(im, MVT::i1, Legal); + setIndexedStoreAction(im, MVT::i8, Legal); + setIndexedStoreAction(im, MVT::i16, Legal); + setIndexedStoreAction(im, MVT::i32, Legal); + } } // i64 operation support. @@ -301,7 +303,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - if (!Subtarget->hasV6Ops()) { + if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); } @@ -402,7 +404,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; case ARMISD::CMP: return "ARMISD::CMP"; - case ARMISD::CMPNZ: return "ARMISD::CMPNZ"; + case ARMISD::CMPZ: return "ARMISD::CMPZ"; case ARMISD::CMPFP: return "ARMISD::CMPFP"; case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; @@ -455,6 +457,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { } } +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { + return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2; +} + //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// @@ -1152,7 +1159,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, // FIXME: is there useful debug info available here? std::pair<SDValue, SDValue> CallResult = LowerCallTo(Chain, (const Type *) Type::Int32Ty, false, false, false, false, - CallingConv::C, false, + 0, CallingConv::C, false, DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); return CallResult.first; } @@ -1592,10 +1599,8 @@ static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, break; case ARMCC::EQ: case ARMCC::NE: - case ARMCC::MI: - case ARMCC::PL: - // Uses only N and Z Flags - CompareType = ARMISD::CMPNZ; + // Uses only Z Flag + CompareType = ARMISD::CMPZ; break; } ARMCC = DAG.getConstant(CondCode, MVT::i32); @@ -2920,10 +2925,10 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, return true; } -static bool getIndexedAddressParts(SDNode *Ptr, MVT VT, - bool isSEXTLoad, SDValue &Base, - SDValue &Offset, bool &isInc, - SelectionDAG &DAG) { +static bool getARMIndexedAddressParts(SDNode *Ptr, MVT VT, + bool isSEXTLoad, SDValue &Base, + SDValue &Offset, bool &isInc, + SelectionDAG &DAG) { if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) return false; @@ -2933,6 +2938,7 @@ static bool getIndexedAddressParts(SDNode *Ptr, MVT VT, if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); if (RHSC < 0 && RHSC > -256) { + assert(Ptr->getOpcode() == ISD::ADD); isInc = false; Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); return true; @@ -2946,6 +2952,7 @@ static bool getIndexedAddressParts(SDNode *Ptr, MVT VT, if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); if (RHSC < 0 && RHSC > -0x1000) { + assert(Ptr->getOpcode() == ISD::ADD); isInc = false; Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); Base = Ptr->getOperand(0); @@ -2976,6 +2983,31 @@ static bool getIndexedAddressParts(SDNode *Ptr, MVT VT, return false; } +static bool getT2IndexedAddressParts(SDNode *Ptr, MVT VT, + bool isSEXTLoad, SDValue &Base, + SDValue &Offset, bool &isInc, + SelectionDAG &DAG) { + if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) + return false; + + Base = Ptr->getOperand(0); + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if (RHSC < 0 && RHSC > -0x100) { // 8 bits. + assert(Ptr->getOpcode() == ISD::ADD); + isInc = false; + Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); + return true; + } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. + isInc = Ptr->getOpcode() == ISD::ADD; + Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); + return true; + } + } + + return false; +} + /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. @@ -2984,7 +3016,7 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { - if (Subtarget->isThumb()) + if (Subtarget->isThumb1Only()) return false; MVT VT; @@ -3001,13 +3033,18 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, return false; bool isInc; - bool isLegal = getIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset, - isInc, DAG); - if (isLegal) { - AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; - return true; - } - return false; + bool isLegal = false; + if (Subtarget->isThumb2()) + isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, + Offset, isInc, DAG); + else + isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, + Offset, isInc, DAG); + if (!isLegal) + return false; + + AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; + return true; } /// getPostIndexedAddressParts - returns true by value, base pointer and @@ -3018,7 +3055,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { - if (Subtarget->isThumb()) + if (Subtarget->isThumb1Only()) return false; MVT VT; @@ -3033,13 +3070,18 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, return false; bool isInc; - bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + bool isLegal = false; + if (Subtarget->isThumb2()) + isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); - if (isLegal) { - AM = isInc ? ISD::POST_INC : ISD::POST_DEC; - return true; - } - return false; + else + isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + if (!isLegal) + return false; + + AM = isInc ? ISD::POST_INC : ISD::POST_DEC; + return true; } void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 631e37f..553a86d 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -45,7 +45,7 @@ namespace llvm { PIC_ADD, // Add with a PC operand and a PIC label. CMP, // ARM compare instructions. - CMPNZ, // ARM compare that uses only N or Z flags. + CMPZ, // ARM compare that sets only Z flag. CMPFP, // ARM VFP compare instruction, sets FPSCR. CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. FMSTAT, // ARM fmstat instruction. @@ -197,6 +197,9 @@ namespace llvm { return Subtarget; } + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *F) const; + private: /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index d7371b0..301a6c1 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -59,6 +59,49 @@ def NEONDupFrm : Format<27>; class UnaryDP { bit isUnaryDataProc = 1; } //===----------------------------------------------------------------------===// +// ARM Instruction flags. These need to match ARMInstrInfo.h. +// + +// Addressing mode. +class AddrMode<bits<4> val> { + bits<4> Value = val; +} +def AddrModeNone : AddrMode<0>; +def AddrMode1 : AddrMode<1>; +def AddrMode2 : AddrMode<2>; +def AddrMode3 : AddrMode<3>; +def AddrMode4 : AddrMode<4>; +def AddrMode5 : AddrMode<5>; +def AddrMode6 : AddrMode<6>; +def AddrModeT1_1 : AddrMode<7>; +def AddrModeT1_2 : AddrMode<8>; +def AddrModeT1_4 : AddrMode<9>; +def AddrModeT1_s : AddrMode<10>; +def AddrModeT2_i12: AddrMode<12>; +def AddrModeT2_i8 : AddrMode<12>; +def AddrModeT2_so : AddrMode<13>; +def AddrModeT2_pc : AddrMode<14>; +def AddrModeT2_i8s4 : AddrMode<15>; + +// Instruction size. +class SizeFlagVal<bits<3> val> { + bits<3> Value = val; +} +def SizeInvalid : SizeFlagVal<0>; // Unset. +def SizeSpecial : SizeFlagVal<1>; // Pseudo or special. +def Size8Bytes : SizeFlagVal<2>; +def Size4Bytes : SizeFlagVal<3>; +def Size2Bytes : SizeFlagVal<4>; + +// Load / store index mode. +class IndexMode<bits<2> val> { + bits<2> Value = val; +} +def IndexModeNone : IndexMode<0>; +def IndexModePre : IndexMode<1>; +def IndexModePost : IndexMode<2>; + +//===----------------------------------------------------------------------===// // ARM Instruction templates. // @@ -706,7 +749,6 @@ class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> { // Thumb Instruction Format Definitions. // - // TI - Thumb instruction. class ThumbI<dag outs, dag ins, AddrMode am, SizeFlagVal sz, @@ -721,18 +763,6 @@ class ThumbI<dag outs, dag ins, AddrMode am, SizeFlagVal sz, class TI<dag outs, dag ins, string asm, list<dag> pattern> : ThumbI<outs, ins, AddrModeNone, Size2Bytes, asm, "", pattern>; -class TI1<dag outs, dag ins, string asm, list<dag> pattern> - : ThumbI<outs, ins, AddrModeT1, Size2Bytes, asm, "", pattern>; -class TI2<dag outs, dag ins, string asm, list<dag> pattern> - : ThumbI<outs, ins, AddrModeT2, Size2Bytes, asm, "", pattern>; -class TI4<dag outs, dag ins, string asm, list<dag> pattern> - : ThumbI<outs, ins, AddrModeT4, Size2Bytes, asm, "", pattern>; -class TIs<dag outs, dag ins, string asm, list<dag> pattern> - : ThumbI<outs, ins, AddrModeTs, Size2Bytes, asm, "", pattern>; - -// Two-address instructions -class TIt<dag outs, dag ins, string asm, list<dag> pattern> - : ThumbI<outs, ins, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>; // BL, BLX(1) are translated by assembler into two instructions class TIx2<dag outs, dag ins, string asm, list<dag> pattern> @@ -764,6 +794,18 @@ class Thumb1I<dag outs, dag ins, AddrMode am, SizeFlagVal sz, class T1I<dag outs, dag ins, string asm, list<dag> pattern> : Thumb1I<outs, ins, AddrModeNone, Size2Bytes, asm, "", pattern>; +class T1I1<dag outs, dag ins, string asm, list<dag> pattern> + : Thumb1I<outs, ins, AddrModeT1_1, Size2Bytes, asm, "", pattern>; +class T1I2<dag outs, dag ins, string asm, list<dag> pattern> + : Thumb1I<outs, ins, AddrModeT1_2, Size2Bytes, asm, "", pattern>; +class T1I4<dag outs, dag ins, string asm, list<dag> pattern> + : Thumb1I<outs, ins, AddrModeT1_4, Size2Bytes, asm, "", pattern>; +class T1Is<dag outs, dag ins, string asm, list<dag> pattern> + : Thumb1I<outs, ins, AddrModeT1_s, Size2Bytes, asm, "", pattern>; +class T1Ix2<dag outs, dag ins, string asm, list<dag> pattern> + : Thumb1I<outs, ins, AddrModeNone, Size4Bytes, asm, "", pattern>; +class T1JTI<dag outs, dag ins, string asm, list<dag> pattern> + : Thumb1I<outs, ins, AddrModeNone, SizeSpecial, asm, "", pattern>; // Two-address instructions class T1It<dag outs, dag ins, string asm, list<dag> pattern> @@ -781,7 +823,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; - list<Predicate> Predicates = [IsThumb, HasThumb2]; + list<Predicate> Predicates = [IsThumb2]; } // Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as @@ -796,7 +838,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, let InOperandList = !con(iops, (ops pred:$p, cc_out:$s)); let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); let Pattern = pattern; - list<Predicate> Predicates = [IsThumb, HasThumb2]; + list<Predicate> Predicates = [IsThumb2]; } // Special cases @@ -807,21 +849,45 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, let InOperandList = iops; let AsmString = asm; let Pattern = pattern; - list<Predicate> Predicates = [IsThumb, HasThumb2]; + list<Predicate> Predicates = [IsThumb2]; } class T2I<dag oops, dag iops, string opc, string asm, list<dag> pattern> : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, opc, asm, "", pattern>; +class T2Ii12<dag oops, dag iops, string opc, string asm, list<dag> pattern> + : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, opc, asm, "", pattern>; +class T2Ii8<dag oops, dag iops, string opc, string asm, list<dag> pattern> + : Thumb2I<oops, iops, AddrModeT2_i8, Size4Bytes, opc, asm, "", pattern>; +class T2Iso<dag oops, dag iops, string opc, string asm, list<dag> pattern> + : Thumb2I<oops, iops, AddrModeT2_so, Size4Bytes, opc, asm, "", pattern>; +class T2Ipc<dag oops, dag iops, string opc, string asm, list<dag> pattern> + : Thumb2I<oops, iops, AddrModeT2_pc, Size4Bytes, opc, asm, "", pattern>; +class T2Ii8s4<dag oops, dag iops, string opc, string asm, list<dag> pattern> + : Thumb2I<oops, iops, AddrModeT2_i8s4, Size4Bytes, opc, asm, "", pattern>; class T2sI<dag oops, dag iops, string opc, string asm, list<dag> pattern> : Thumb2sI<oops, iops, AddrModeNone, Size4Bytes, opc, asm, "", pattern>; class T2XI<dag oops, dag iops, string asm, list<dag> pattern> : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, asm, "", pattern>; +class T2JTI<dag oops, dag iops, string asm, list<dag> pattern> + : Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, asm, "", pattern>; + +// T2Iidxldst - Thumb2 indexed load / store instructions. +class T2Iidxldst<dag oops, dag iops, AddrMode am, IndexMode im, + string opc, string asm, string cstr, list<dag> pattern> + : InstARM<am, Size4Bytes, im, ThumbFrm, cstr> { + let OutOperandList = oops; + let InOperandList = !con(iops, (ops pred:$p)); + let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let Pattern = pattern; + list<Predicate> Predicates = [IsThumb2]; +} + // T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode. class T2Pat<dag pattern, dag result> : Pat<pattern, result> { - list<Predicate> Predicates = [IsThumb, HasThumb2]; + list<Predicate> Predicates = [IsThumb2]; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index d95089d..443fdc7 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -40,108 +40,24 @@ const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) { } ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI) - : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)), - RI(*this, STI) { + : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)) { } ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI) { + : ARMBaseInstrInfo(STI), RI(*this, STI) { } -/// Return true if the instruction is a register to register move and -/// leave the source and dest operands in the passed parameters. -/// -bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned& SrcSubIdx, unsigned& DstSubIdx) const { - SrcSubIdx = DstSubIdx = 0; // No sub-registers. - - unsigned oc = MI.getOpcode(); - switch (oc) { - default: - return false; - case ARM::FCPYS: - case ARM::FCPYD: - case ARM::VMOVD: - case ARM::VMOVQ: - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - return true; - case ARM::MOVr: - assert(MI.getDesc().getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "Invalid ARM MOV instruction"); - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - return true; - } -} - -unsigned ARMInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case ARM::LDR: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isReg() && - MI->getOperand(3).isImm() && - MI->getOperand(2).getReg() == 0 && - MI->getOperand(3).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - case ARM::FLDD: - case ARM::FLDS: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - return 0; -} - -unsigned ARMInstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case ARM::STR: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isReg() && - MI->getOperand(3).isImm() && - MI->getOperand(2).getReg() == 0 && - MI->getOperand(3).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - case ARM::FSTD: - case ARM::FSTS: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - - return 0; -} - -void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, - const MachineInstr *Orig) const { +void ARMInstrInfo::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, + const MachineInstr *Orig) const { DebugLoc dl = Orig->getDebugLoc(); if (Orig->getOpcode() == ARM::MOVi2pieces) { - RI.emitLoadConstPool(MBB, I, DestReg, Orig->getOperand(1).getImm(), - Orig->getOperand(2).getImm(), - Orig->getOperand(3).getReg(), this, false, dl); + RI.emitLoadConstPool(MBB, I, this, dl, + DestReg, + Orig->getOperand(1).getImm(), + (ARMCC::CondCodes)Orig->getOperand(2).getImm(), + Orig->getOperand(3).getReg()); return; } @@ -334,10 +250,10 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // Branch analysis. bool - ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const { +ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) @@ -349,11 +265,11 @@ bool // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (LastOpc == ARM::B || LastOpc == ARM::tB) { + if (LastOpc == ARM::B || LastOpc == ARM::tB || LastOpc == ARM::t2B) { TBB = LastInst->getOperand(0).getMBB(); return false; } - if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) { + if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc || LastOpc == ARM::t2Bcc) { // Block ends with fall-through condbranch. TBB = LastInst->getOperand(0).getMBB(); Cond.push_back(LastInst->getOperand(1)); @@ -370,10 +286,12 @@ bool if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; - // If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it. + // If the block ends with ARM::B/ARM::tB/ARM::t2B and a + // ARM::Bcc/ARM::tBcc/ARM::t2Bcc, handle it. unsigned SecondLastOpc = SecondLastInst->getOpcode(); if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) || - (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) { + (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB) || + (SecondLastOpc == ARM::t2Bcc && LastOpc == ARM::t2B)) { TBB = SecondLastInst->getOperand(0).getMBB(); Cond.push_back(SecondLastInst->getOperand(1)); Cond.push_back(SecondLastInst->getOperand(2)); @@ -383,8 +301,9 @@ bool // If the block ends with two unconditional branches, handle it. The second // one is not executed, so remove it. - if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB) && - (LastOpc == ARM::B || LastOpc == ARM::tB)) { + if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB || + SecondLastOpc==ARM::t2B) && + (LastOpc == ARM::B || LastOpc == ARM::tB || LastOpc == ARM::t2B)) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) @@ -396,8 +315,10 @@ bool // branch. The branch folder can create these, and we must get rid of them for // correctness of Thumb constant islands. if ((SecondLastOpc == ARM::BR_JTr || SecondLastOpc==ARM::BR_JTm || - SecondLastOpc == ARM::BR_JTadd || SecondLastOpc==ARM::tBR_JTr) && - (LastOpc == ARM::B || LastOpc == ARM::tB)) { + SecondLastOpc == ARM::BR_JTadd || SecondLastOpc==ARM::tBR_JTr || + SecondLastOpc == ARM::t2BR_JTr || SecondLastOpc==ARM::t2BR_JTm || + SecondLastOpc == ARM::t2BR_JTadd) && + (LastOpc == ARM::B || LastOpc == ARM::tB || LastOpc == ARM::t2B)) { I = LastInst; if (AllowModify) I->eraseFromParent(); @@ -412,8 +333,10 @@ bool unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B; - int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc; + int BOpc = AFI->isThumbFunction() ? + (AFI->isThumb2Function() ? ARM::t2B : ARM::tB) : ARM::B; + int BccOpc = AFI->isThumbFunction() ? + (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return 0; @@ -444,8 +367,10 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc dl = DebugLoc::getUnknownLoc(); MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B; - int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc; + int BOpc = AFI->isThumbFunction() ? + (AFI->isThumb2Function() ? ARM::t2B : ARM::tB) : ARM::B; + int BccOpc = AFI->isThumbFunction() ? + (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); @@ -468,11 +393,288 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, return 2; } -bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { +bool +ARMBaseInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { + if (MBB.empty()) return false; + + switch (MBB.back().getOpcode()) { + case ARM::BX_RET: // Return. + case ARM::LDM_RET: + case ARM::tBX_RET: + case ARM::tBX_RET_vararg: + case ARM::tPOP_RET: + case ARM::B: + case ARM::tB: + case ARM::t2B: // Uncond branch. + case ARM::tBR_JTr: + case ARM::t2BR_JTr: + case ARM::BR_JTr: // Jumptable branch. + case ARM::t2BR_JTm: + case ARM::BR_JTm: // Jumptable branch through mem. + case ARM::t2BR_JTadd: + case ARM::BR_JTadd: // Jumptable branch add to pc. + return true; + default: return false; + } +} + +bool ARMBaseInstrInfo:: +ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); + Cond[0].setImm(ARMCC::getOppositeCondition(CC)); + return false; +} + +bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { + int PIdx = MI->findFirstPredOperandIdx(); + return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; +} + +bool ARMBaseInstrInfo:: +PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const { + unsigned Opc = MI->getOpcode(); + if (Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B) { + MI->setDesc(get((Opc == ARM::B) ? ARM::Bcc : + ((Opc == ARM::tB) ? ARM::tBcc : ARM::t2Bcc))); + MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm())); + MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false)); + return true; + } + + int PIdx = MI->findFirstPredOperandIdx(); + if (PIdx != -1) { + MachineOperand &PMO = MI->getOperand(PIdx); + PMO.setImm(Pred[0].getImm()); + MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); + return true; + } + return false; +} + +bool ARMBaseInstrInfo:: +SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const { + if (Pred1.size() > 2 || Pred2.size() > 2) + return false; + + ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); + ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); + if (CC1 == CC2) + return true; + + switch (CC1) { + default: + return false; + case ARMCC::AL: + return true; + case ARMCC::HS: + return CC2 == ARMCC::HI; + case ARMCC::LS: + return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; + case ARMCC::GE: + return CC2 == ARMCC::GT; + case ARMCC::LE: + return CC2 == ARMCC::LT; + } +} + +bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.getImplicitDefs() && !TID.hasOptionalDef()) + return false; + + bool Found = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == ARM::CPSR) { + Pred.push_back(MO); + Found = true; + } + } + + return Found; +} + + +/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing +static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, + unsigned JTI) DISABLE_INLINE; +static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, + unsigned JTI) { + return JT[JTI].MBBs.size(); +} + +/// GetInstSize - Return the size of the specified MachineInstr. +/// +unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { + const MachineBasicBlock &MBB = *MI->getParent(); + const MachineFunction *MF = MBB.getParent(); + const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo(); + + // Basic size info comes from the TSFlags field. + const TargetInstrDesc &TID = MI->getDesc(); + unsigned TSFlags = TID.TSFlags; + + switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) { + default: { + // If this machine instr is an inline asm, measure it. + if (MI->getOpcode() == ARM::INLINEASM) + return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName()); + if (MI->isLabel()) + return 0; + switch (MI->getOpcode()) { + default: + assert(0 && "Unknown or unset size field for instr!"); + break; + case TargetInstrInfo::IMPLICIT_DEF: + case TargetInstrInfo::DECLARE: + case TargetInstrInfo::DBG_LABEL: + case TargetInstrInfo::EH_LABEL: + return 0; + } + break; + } + case ARMII::Size8Bytes: return 8; // Arm instruction x 2. + case ARMII::Size4Bytes: return 4; // Arm instruction. + case ARMII::Size2Bytes: return 2; // Thumb instruction. + case ARMII::SizeSpecial: { + switch (MI->getOpcode()) { + case ARM::CONSTPOOL_ENTRY: + // If this machine instr is a constant pool entry, its size is recorded as + // operand #2. + return MI->getOperand(2).getImm(); + case ARM::Int_eh_sjlj_setjmp: return 12; + case ARM::BR_JTr: + case ARM::BR_JTm: + case ARM::BR_JTadd: + case ARM::t2BR_JTr: + case ARM::t2BR_JTm: + case ARM::t2BR_JTadd: + case ARM::tBR_JTr: { + // These are jumptable branches, i.e. a branch followed by an inlined + // jumptable. The size is 4 + 4 * number of entries. + unsigned NumOps = TID.getNumOperands(); + MachineOperand JTOP = + MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2)); + unsigned JTI = JTOP.getIndex(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + assert(JTI < JT.size()); + // Thumb instructions are 2 byte aligned, but JT entries are 4 byte + // 4 aligned. The assembler / linker may add 2 byte padding just before + // the JT entries. The size does not include this padding; the + // constant islands pass does separate bookkeeping for it. + // FIXME: If we know the size of the function is less than (1 << 16) *2 + // bytes, we can use 16-bit entries instead. Then there won't be an + // alignment issue. + return getNumJTEntries(JT, JTI) * 4 + + ((MI->getOpcode()==ARM::tBR_JTr) ? 2 : 4); + } + default: + // Otherwise, pseudo-instruction sizes are zero. + return 0; + } + } + } + return 0; // Not reached +} + +/// Return true if the instruction is a register to register move and +/// leave the source and dest operands in the passed parameters. +/// +bool +ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned& SrcSubIdx, unsigned& DstSubIdx) const { + SrcSubIdx = DstSubIdx = 0; // No sub-registers. + + unsigned oc = MI.getOpcode(); + switch (oc) { + default: + return false; + case ARM::FCPYS: + case ARM::FCPYD: + case ARM::VMOVD: + case ARM::VMOVQ: + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + return true; + case ARM::MOVr: + assert(MI.getDesc().getNumOperands() >= 2 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + "Invalid ARM MOV instruction"); + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + return true; + } +} + +unsigned +ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case ARM::LDR: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isReg() && + MI->getOperand(3).isImm() && + MI->getOperand(2).getReg() == 0 && + MI->getOperand(3).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::FLDD: + case ARM::FLDS: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +unsigned +ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case ARM::STR: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isReg() && + MI->getOperand(3).isImm() && + MI->getOperand(2).getReg() == 0 && + MI->getOperand(3).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::FSTD: + case ARM::FSTS: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + + return 0; +} + +bool +ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); @@ -498,7 +700,7 @@ bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB, return true; } -void ARMInstrInfo:: +void ARMBaseInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC) const { @@ -521,11 +723,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } } -void ARMInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, - bool isKill, - SmallVectorImpl<MachineOperand> &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const{ +void +ARMBaseInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, + bool isKill, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const{ DebugLoc DL = DebugLoc::getUnknownLoc(); unsigned Opc = 0; if (RC == ARM::GPRRegisterClass) { @@ -546,7 +749,7 @@ void ARMInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, return; } -void ARMInstrInfo:: +void ARMBaseInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC) const { @@ -566,7 +769,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } } -void ARMInstrInfo:: +void ARMBaseInstrInfo:: loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC, @@ -590,7 +793,7 @@ loadRegFromAddr(MachineFunction &MF, unsigned DestReg, return; } -MachineInstr *ARMInstrInfo:: +MachineInstr *ARMBaseInstrInfo:: foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops, int FI) const { if (Ops.size() != 1) return NULL; @@ -609,14 +812,19 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, if (OpNum == 0) { // move -> store unsigned SrcReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR)) - .addReg(SrcReg, getKillRegState(isKill)) + .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); } else { // move -> load unsigned DstReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef)) .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); } break; @@ -626,14 +834,22 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned PredReg = MI->getOperand(3).getReg(); if (OpNum == 0) { // move -> store unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS)) - .addReg(SrcReg).addFrameIndex(FI) + .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) + .addFrameIndex(FI) .addImm(0).addImm(Pred).addReg(PredReg); } else { // move -> load unsigned DstReg = MI->getOperand(0).getReg(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS), DstReg) - .addFrameIndex(FI) - .addImm(0).addImm(Pred).addReg(PredReg); + bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef)) + .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } break; } @@ -643,14 +859,19 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, if (OpNum == 0) { // move -> store unsigned SrcReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD)) - .addReg(SrcReg, getKillRegState(isKill)) + .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } else { // move -> load unsigned DstReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef)) .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } break; @@ -660,35 +881,25 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, return NewMI; } -bool ARMBaseInstrInfo:: -canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const { +MachineInstr* +ARMBaseInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const { + return 0; +} + +bool +ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops) const { if (Ops.size() != 1) return false; - unsigned OpNum = Ops[0]; unsigned Opc = MI->getOpcode(); switch (Opc) { default: break; case ARM::MOVr: // If it is updating CPSR, then it cannot be folded. return MI->getOperand(4).getReg() != ARM::CPSR; - case ARM::tMOVr: - case ARM::tMOVlor2hir: - case ARM::tMOVhir2lor: - case ARM::tMOVhir2hir: { - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg)) - // tSpill cannot take a high register operand. - return false; - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg)) - // tRestore cannot target a high register operand. - return false; - } - return true; - } case ARM::FCPYS: case ARM::FCPYD: return true; @@ -700,183 +911,3 @@ canFoldMemoryOperand(const MachineInstr *MI, return false; } - -bool - ARMBaseInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case ARM::BX_RET: // Return. - case ARM::LDM_RET: - case ARM::tBX_RET: - case ARM::tBX_RET_vararg: - case ARM::tPOP_RET: - case ARM::B: - case ARM::tB: // Uncond branch. - case ARM::tBR_JTr: - case ARM::BR_JTr: // Jumptable branch. - case ARM::BR_JTm: // Jumptable branch through mem. - case ARM::BR_JTadd: // Jumptable branch add to pc. - return true; - default: return false; - } -} - -bool ARMBaseInstrInfo:: -ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { - ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); - Cond[0].setImm(ARMCC::getOppositeCondition(CC)); - return false; -} - -bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { - int PIdx = MI->findFirstPredOperandIdx(); - return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; -} - -bool ARMBaseInstrInfo:: -PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl<MachineOperand> &Pred) const { - unsigned Opc = MI->getOpcode(); - if (Opc == ARM::B || Opc == ARM::tB) { - MI->setDesc(get(Opc == ARM::B ? ARM::Bcc : ARM::tBcc)); - MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm())); - MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false)); - return true; - } - - int PIdx = MI->findFirstPredOperandIdx(); - if (PIdx != -1) { - MachineOperand &PMO = MI->getOperand(PIdx); - PMO.setImm(Pred[0].getImm()); - MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); - return true; - } - return false; -} - -bool ARMBaseInstrInfo:: -SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, - const SmallVectorImpl<MachineOperand> &Pred2) const { - if (Pred1.size() > 2 || Pred2.size() > 2) - return false; - - ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); - ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); - if (CC1 == CC2) - return true; - - switch (CC1) { - default: - return false; - case ARMCC::AL: - return true; - case ARMCC::HS: - return CC2 == ARMCC::HI; - case ARMCC::LS: - return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; - case ARMCC::GE: - return CC2 == ARMCC::GT; - case ARMCC::LE: - return CC2 == ARMCC::LT; - } -} - -bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, - std::vector<MachineOperand> &Pred) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.getImplicitDefs() && !TID.hasOptionalDef()) - return false; - - bool Found = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == ARM::CPSR) { - Pred.push_back(MO); - Found = true; - } - } - - return Found; -} - - -/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing -static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, - unsigned JTI) DISABLE_INLINE; -static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, - unsigned JTI) { - return JT[JTI].MBBs.size(); -} - -/// GetInstSize - Return the size of the specified MachineInstr. -/// -unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - const MachineBasicBlock &MBB = *MI->getParent(); - const MachineFunction *MF = MBB.getParent(); - const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo(); - - // Basic size info comes from the TSFlags field. - const TargetInstrDesc &TID = MI->getDesc(); - unsigned TSFlags = TID.TSFlags; - - switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) { - default: { - // If this machine instr is an inline asm, measure it. - if (MI->getOpcode() == ARM::INLINEASM) - return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName()); - if (MI->isLabel()) - return 0; - switch (MI->getOpcode()) { - default: - assert(0 && "Unknown or unset size field for instr!"); - break; - case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::DECLARE: - case TargetInstrInfo::DBG_LABEL: - case TargetInstrInfo::EH_LABEL: - return 0; - } - break; - } - case ARMII::Size8Bytes: return 8; // Arm instruction x 2. - case ARMII::Size4Bytes: return 4; // Arm instruction. - case ARMII::Size2Bytes: return 2; // Thumb instruction. - case ARMII::SizeSpecial: { - switch (MI->getOpcode()) { - case ARM::CONSTPOOL_ENTRY: - // If this machine instr is a constant pool entry, its size is recorded as - // operand #2. - return MI->getOperand(2).getImm(); - case ARM::Int_eh_sjlj_setjmp: return 12; - case ARM::BR_JTr: - case ARM::BR_JTm: - case ARM::BR_JTadd: - case ARM::tBR_JTr: { - // These are jumptable branches, i.e. a branch followed by an inlined - // jumptable. The size is 4 + 4 * number of entries. - unsigned NumOps = TID.getNumOperands(); - MachineOperand JTOP = - MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2)); - unsigned JTI = JTOP.getIndex(); - const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); - assert(JTI < JT.size()); - // Thumb instructions are 2 byte aligned, but JT entries are 4 byte - // 4 aligned. The assembler / linker may add 2 byte padding just before - // the JT entries. The size does not include this padding; the - // constant islands pass does separate bookkeeping for it. - // FIXME: If we know the size of the function is less than (1 << 16) *2 - // bytes, we can use 16-bit entries instead. Then there won't be an - // alignment issue. - return getNumJTEntries(JT, JTI) * 4 + - (MI->getOpcode()==ARM::tBR_JTr ? 2 : 4); - } - default: - // Otherwise, pseudo-instruction sizes are zero. - return 0; - } - } - } - return 0; // Not reached -} diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 131960b..8c8f788 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -33,16 +33,22 @@ namespace ARMII { // This four-bit field describes the addressing mode used. AddrModeMask = 0xf, - AddrModeNone = 0, - AddrMode1 = 1, - AddrMode2 = 2, - AddrMode3 = 3, - AddrMode4 = 4, - AddrMode5 = 5, - AddrModeT1 = 6, - AddrModeT2 = 7, - AddrModeT4 = 8, - AddrModeTs = 9, // i8 * 4 for pc and sp relative data + AddrModeNone = 0, + AddrMode1 = 1, + AddrMode2 = 2, + AddrMode3 = 3, + AddrMode4 = 4, + AddrMode5 = 5, + AddrMode6 = 6, + AddrModeT1_1 = 7, + AddrModeT1_2 = 8, + AddrModeT1_4 = 9, + AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data + AddrModeT2_i12 = 11, + AddrModeT2_i8 = 12, + AddrModeT2_so = 13, + AddrModeT2_pc = 14, // +/- i12 for pc relative data + AddrModeT2_i8s4 = 15, // i8 * 4 // Size* - Flags to keep track of the size of an instruction. SizeShift = 4, @@ -147,25 +153,16 @@ namespace ARMII { } class ARMBaseInstrInfo : public TargetInstrInfoImpl { - const ARMRegisterInfo RI; protected: // Can be only subclassed. explicit ARMBaseInstrInfo(const ARMSubtarget &STI); public: - - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). - /// - virtual const ARMRegisterInfo &getRegisterInfo() const { return RI; } - - void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - unsigned DestReg, const MachineInstr *Orig) const; - virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const; + virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0; + // Branch analysis. virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -176,9 +173,6 @@ public: MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond) const; - virtual bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const; - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; @@ -206,11 +200,6 @@ public: /// GetInstSize - Returns the size of the specified MachineInstr. /// virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const; -}; - -class ARMInstrInfo : public ARMBaseInstrInfo { -public: - explicit ARMInstrInfo(const ARMSubtarget &STI); /// Return true if the instruction is a register to register move and return /// the source and dest operands and their sub-register indices by reference. @@ -248,17 +237,33 @@ public: const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const; + virtual bool canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops) const; + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, + const SmallVectorImpl<unsigned> &Ops, int FrameIndex) const; virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const; +}; + +class ARMInstrInfo : public ARMBaseInstrInfo { + ARMRegisterInfo RI; +public: + explicit ARMInstrInfo(const ARMSubtarget &STI); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + const ARMRegisterInfo &getRegisterInfo() const { return RI; } + + void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned DestReg, const MachineInstr *Orig) const; }; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index cb7b7b9..408f47a 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -75,8 +75,8 @@ def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, [SDNPOutFlag]>; -def ARMcmpNZ : SDNode<"ARMISD::CMPNZ", SDT_ARMCmp, - [SDNPOutFlag]>; +def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, + [SDNPOutFlag,SDNPCommutative]>; def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>; @@ -99,7 +99,7 @@ def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; def HasNEON : Predicate<"Subtarget->hasNEON()">; def IsThumb : Predicate<"Subtarget->isThumb()">; def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; -def HasThumb2 : Predicate<"Subtarget->hasThumb2()">; +def IsThumb2 : Predicate<"Subtarget->isThumb2()">; def IsARM : Predicate<"!Subtarget->isThumb()">; def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; @@ -287,6 +287,14 @@ def addrmode5 : Operand<i32>, let MIOperandInfo = (ops GPR, i32imm); } +// addrmode6 := reg with optional writeback +// +def addrmode6 : Operand<i32>, + ComplexPattern<i32, 3, "SelectAddrMode6", []> { + let PrintMethod = "printAddrMode6Operand"; + let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm); +} + // addrmodepc := pc + reg // def addrmodepc : Operand<i32>, @@ -309,43 +317,6 @@ def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> { } //===----------------------------------------------------------------------===// -// ARM Instruction flags. These need to match ARMInstrInfo.h. -// - -// Addressing mode. -class AddrMode<bits<4> val> { - bits<4> Value = val; -} -def AddrModeNone : AddrMode<0>; -def AddrMode1 : AddrMode<1>; -def AddrMode2 : AddrMode<2>; -def AddrMode3 : AddrMode<3>; -def AddrMode4 : AddrMode<4>; -def AddrMode5 : AddrMode<5>; -def AddrModeT1 : AddrMode<6>; -def AddrModeT2 : AddrMode<7>; -def AddrModeT4 : AddrMode<8>; -def AddrModeTs : AddrMode<9>; - -// Instruction size. -class SizeFlagVal<bits<3> val> { - bits<3> Value = val; -} -def SizeInvalid : SizeFlagVal<0>; // Unset. -def SizeSpecial : SizeFlagVal<1>; // Pseudo or special. -def Size8Bytes : SizeFlagVal<2>; -def Size4Bytes : SizeFlagVal<3>; -def Size2Bytes : SizeFlagVal<4>; - -// Load / store index mode. -class IndexMode<bits<2> val> { - bits<2> Value = val; -} -def IndexModeNone : IndexMode<0>; -def IndexModePre : IndexMode<1>; -def IndexModePost : IndexMode<2>; - -//===----------------------------------------------------------------------===// include "ARMInstrFormats.td" @@ -780,7 +751,7 @@ def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb), def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am3offset:$offset), LdMiscFrm, - "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>; + "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>; } // Store @@ -1309,19 +1280,19 @@ defm CMN : AI1_cmp_irs<0b1011, "cmn", // Note that TST/TEQ don't set all the same flags that CMP does! defm TST : AI1_cmp_irs<0b1000, "tst", - BinOpFrag<(ARMcmpNZ (and node:$LHS, node:$RHS), 0)>, 1>; + BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>, 1>; defm TEQ : AI1_cmp_irs<0b1001, "teq", - BinOpFrag<(ARMcmpNZ (xor node:$LHS, node:$RHS), 0)>, 1>; + BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>, 1>; -defm CMPnz : AI1_cmp_irs<0b1010, "cmp", - BinOpFrag<(ARMcmpNZ node:$LHS, node:$RHS)>>; -defm CMNnz : AI1_cmp_irs<0b1011, "cmn", - BinOpFrag<(ARMcmpNZ node:$LHS,(ineg node:$RHS))>>; +defm CMPz : AI1_cmp_irs<0b1010, "cmp", + BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>; +defm CMNz : AI1_cmp_irs<0b1011, "cmn", + BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>; def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm), (CMNri GPR:$src, so_imm_neg:$imm)>; -def : ARMPat<(ARMcmpNZ GPR:$src, so_imm_neg:$imm), +def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), (CMNri GPR:$src, so_imm_neg:$imm)>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 7927ca5..904d9b1 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -187,23 +187,24 @@ let isCall = 1, let isBranch = 1, isTerminator = 1 in { let isBarrier = 1 in { let isPredicable = 1 in - def tB : TI<(outs), (ins brtarget:$target), "b $target", - [(br bb:$target)]>; + def tB : T1I<(outs), (ins brtarget:$target), "b $target", + [(br bb:$target)]>; // Far jump - def tBfar : TIx2<(outs), (ins brtarget:$target), "bl $target\t@ far jump",[]>; + def tBfar : T1Ix2<(outs), (ins brtarget:$target), + "bl $target\t@ far jump",[]>; - def tBR_JTr : TJTI<(outs), - (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), - "cpy pc, $target \n\t.align\t2\n$jt", - [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>; + def tBR_JTr : T1JTI<(outs), + (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), + "cpy pc, $target \n\t.align\t2\n$jt", + [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>; } } // FIXME: should be able to write a pattern for ARMBrcond, but can't use // a two-value operand where a dag node expects two operands. :( let isBranch = 1, isTerminator = 1 in - def tBcc : TI<(outs), (ins brtarget:$target, pred:$cc), "b$cc $target", + def tBcc : T1I<(outs), (ins brtarget:$target, pred:$cc), "b$cc $target", [/*(ARMbrcond bb:$target, imm:$cc)*/]>; //===----------------------------------------------------------------------===// @@ -211,68 +212,68 @@ let isBranch = 1, isTerminator = 1 in // let canFoldAsLoad = 1 in -def tLDR : TI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), +def tLDR : T1I4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), "ldr $dst, $addr", [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>; -def tLDRB : TI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), +def tLDRB : T1I1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), "ldrb $dst, $addr", [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>; -def tLDRH : TI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), +def tLDRH : T1I2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), "ldrh $dst, $addr", [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>; -def tLDRSB : TI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), +def tLDRSB : T1I1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), "ldrsb $dst, $addr", [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>; -def tLDRSH : TI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), +def tLDRSH : T1I2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), "ldrsh $dst, $addr", [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>; let canFoldAsLoad = 1 in -def tLDRspi : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), +def tLDRspi : T1Is<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), "ldr $dst, $addr", [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>; // Special instruction for restore. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). let canFoldAsLoad = 1, mayLoad = 1 in -def tRestore : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), +def tRestore : T1Is<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), "ldr $dst, $addr", []>; // Load tconstpool let canFoldAsLoad = 1 in -def tLDRpci : TIs<(outs tGPR:$dst), (ins i32imm:$addr), +def tLDRpci : T1Is<(outs tGPR:$dst), (ins i32imm:$addr), "ldr $dst, $addr", [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>; // Special LDR for loads from non-pc-relative constpools. let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in -def tLDRcp : TIs<(outs tGPR:$dst), (ins i32imm:$addr), +def tLDRcp : T1Is<(outs tGPR:$dst), (ins i32imm:$addr), "ldr $dst, $addr", []>; -def tSTR : TI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), +def tSTR : T1I4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), "str $src, $addr", [(store tGPR:$src, t_addrmode_s4:$addr)]>; -def tSTRB : TI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), +def tSTRB : T1I1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), "strb $src, $addr", [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>; -def tSTRH : TI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), +def tSTRH : T1I2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), "strh $src, $addr", [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>; -def tSTRspi : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), +def tSTRspi : T1Is<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), "str $src, $addr", [(store tGPR:$src, t_addrmode_sp:$addr)]>; let mayStore = 1 in { // Special instruction for spill. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). -def tSpill : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), +def tSpill : T1Is<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), "str $src, $addr", []>; } @@ -362,9 +363,9 @@ let Defs = [CPSR] in { def tCMN : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs), "cmn $lhs, $rhs", [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>; -def tCMNNZ : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs), - "cmn $lhs, $rhs", - [(ARMcmpNZ tGPR:$lhs, (ineg tGPR:$rhs))]>; +def tCMNZ : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs), + "cmn $lhs, $rhs", + [(ARMcmpZ tGPR:$lhs, (ineg tGPR:$rhs))]>; } // CMP immediate @@ -372,9 +373,9 @@ let Defs = [CPSR] in { def tCMPi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs), "cmp $lhs, $rhs", [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>; -def tCMPNZi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs), - "cmp $lhs, $rhs", - [(ARMcmpNZ tGPR:$lhs, imm0_255:$rhs)]>; +def tCMPZi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs), + "cmp $lhs, $rhs", + [(ARMcmpZ tGPR:$lhs, imm0_255:$rhs)]>; } @@ -383,9 +384,9 @@ let Defs = [CPSR] in { def tCMPr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs), "cmp $lhs, $rhs", [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>; -def tCMPNZr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs), - "cmp $lhs, $rhs", - [(ARMcmpNZ tGPR:$lhs, tGPR:$rhs)]>; +def tCMPZr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs), + "cmp $lhs, $rhs", + [(ARMcmpZ tGPR:$lhs, tGPR:$rhs)]>; } // TODO: A7-37: CMP(3) - cmp hi regs @@ -551,7 +552,7 @@ def tSXTH : T1I<(outs tGPR:$dst), (ins tGPR:$src), let isCommutable = 1, Defs = [CPSR] in def tTST : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs), "tst $lhs, $rhs", - [(ARMcmpNZ (and tGPR:$lhs, tGPR:$rhs), 0)]>; + [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>; // zero-extend byte def tUXTB : T1I<(outs tGPR:$dst), (ins tGPR:$src), @@ -622,13 +623,13 @@ def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>; def : Tv5Pat<(ARMcall tGPR:$dst), (tBLXr tGPR:$dst)>; // zextload i1 -> zextload i8 -def : TPat<(zextloadi1 t_addrmode_s1:$addr), - (tLDRB t_addrmode_s1:$addr)>; +def : T1Pat<(zextloadi1 t_addrmode_s1:$addr), + (tLDRB t_addrmode_s1:$addr)>; // extload -> zextload -def : TPat<(extloadi1 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>; -def : TPat<(extloadi8 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>; -def : TPat<(extloadi16 t_addrmode_s2:$addr), (tLDRH t_addrmode_s2:$addr)>; +def : T1Pat<(extloadi1 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>; +def : T1Pat<(extloadi8 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>; +def : T1Pat<(extloadi16 t_addrmode_s2:$addr), (tLDRH t_addrmode_s2:$addr)>; // Large immediate handling. diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index bfdf719..50345a6 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -89,7 +89,6 @@ def imm0_65535 : PatLeaf<(i32 imm), [{ return (uint32_t)N->getZExtValue() < 65536; }]>; - /// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield /// e.g., 0xf000ffff def bf_inv_mask_imm : Operand<i32>, @@ -127,6 +126,42 @@ def t2_lo16AllZero : PatLeaf<(i32 imm), [{ }], t2_hi16>; +// Define Thumb2 specific addressing modes. + +// t2addrmode_imm12 := reg + imm12 +def t2addrmode_imm12 : Operand<i32>, + ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> { + let PrintMethod = "printT2AddrModeImm12Operand"; + let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); +} + +// t2addrmode_imm8 := reg - imm8 +def t2addrmode_imm8 : Operand<i32>, + ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { + let PrintMethod = "printT2AddrModeImm8Operand"; + let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); +} + +def t2am_imm8_offset : Operand<i32>, + ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset", []>{ + let PrintMethod = "printT2AddrModeImm8OffsetOperand"; +} + +// t2addrmode_imm8s4 := reg + (imm8 << 2) +def t2addrmode_imm8s4 : Operand<i32>, + ComplexPattern<i32, 2, "SelectT2AddrModeImm8s4", []> { + let PrintMethod = "printT2AddrModeImm8Operand"; + let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); +} + +// t2addrmode_so_reg := reg + reg << imm2 +def t2addrmode_so_reg : Operand<i32>, + ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> { + let PrintMethod = "printT2AddrModeSoRegOperand"; + let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); +} + + //===----------------------------------------------------------------------===// // Multiclass helpers... // @@ -239,32 +274,32 @@ multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> { def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), opc, " $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, - Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>; + Requires<[IsThumb2, CarryDefIsUnused]>; // register def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), opc, " $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, - Requires<[IsThumb, HasThumb2, CarryDefIsUnused]> { + Requires<[IsThumb2, CarryDefIsUnused]> { let isCommutable = Commutable; } // shifted register def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), opc, " $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, - Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>; + Requires<[IsThumb2, CarryDefIsUnused]>; // Carry setting variants // shifted imm def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), !strconcat(opc, "s $dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, - Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> { + Requires<[IsThumb2, CarryDefIsUsed]> { let Defs = [CPSR]; } // register def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), !strconcat(opc, "s $dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, - Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> { + Requires<[IsThumb2, CarryDefIsUsed]> { let Defs = [CPSR]; let isCommutable = Commutable; } @@ -272,7 +307,7 @@ multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> { def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), !strconcat(opc, "s $dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, - Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> { + Requires<[IsThumb2, CarryDefIsUsed]> { let Defs = [CPSR]; } } @@ -287,24 +322,24 @@ multiclass T2I_rsc_is<string opc, PatFrag opnode> { def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), opc, " $dst, $rhs, $lhs", [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>, - Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>; + Requires<[IsThumb2, CarryDefIsUnused]>; // shifted register def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), opc, " $dst, $rhs, $lhs", [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>, - Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>; + Requires<[IsThumb2, CarryDefIsUnused]>; // shifted imm def Sri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), !strconcat(opc, "s $dst, $rhs, $lhs"), [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>, - Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> { + Requires<[IsThumb2, CarryDefIsUsed]> { let Defs = [CPSR]; } // shifted register def Srs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), !strconcat(opc, "s $dst, $rhs, $lhs"), [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>, - Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> { + Requires<[IsThumb2, CarryDefIsUsed]> { let Defs = [CPSR]; } } @@ -359,6 +394,71 @@ multiclass T2I_cmp_is<string opc, PatFrag opnode> { } } +/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns. +multiclass T2I_ld<string opc, PatFrag opnode> { + def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), + opc, " $dst, $addr", + [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]>; + def i8 : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr), + opc, " $dst, $addr", + [(set GPR:$dst, (opnode t2addrmode_imm8:$addr))]>; + def s : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), + opc, " $dst, $addr", + [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]>; + def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), + opc, " $dst, $addr", + [(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]>; +} + +/// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns. +multiclass T2I_st<string opc, PatFrag opnode> { + def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), + opc, " $src, $addr", + [(opnode GPR:$src, t2addrmode_imm12:$addr)]>; + def i8 : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr), + opc, " $src, $addr", + [(opnode GPR:$src, t2addrmode_imm8:$addr)]>; + def s : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), + opc, " $src, $addr", + [(opnode GPR:$src, t2addrmode_so_reg:$addr)]>; +} + +/// T2I_picld - Defines the PIC load pattern. +class T2I_picld<string opc, PatFrag opnode> : + T2I<(outs GPR:$dst), (ins addrmodepc:$addr), + !strconcat("${addr:label}:\n\t", opc), " $dst, $addr", + [(set GPR:$dst, (opnode addrmodepc:$addr))]>; + +/// T2I_picst - Defines the PIC store pattern. +class T2I_picst<string opc, PatFrag opnode> : + T2I<(outs), (ins GPR:$src, addrmodepc:$addr), + !strconcat("${addr:label}:\n\t", opc), " $src, $addr", + [(opnode GPR:$src, addrmodepc:$addr)]>; + + +/// T2I_unary_rrot - A unary operation with two forms: one whose operand is a +/// register and one whose operand is a register rotated by 8/16/24. +multiclass T2I_unary_rrot<string opc, PatFrag opnode> { + def r : T2I<(outs GPR:$dst), (ins GPR:$Src), + opc, " $dst, $Src", + [(set GPR:$dst, (opnode GPR:$Src))]>; + def r_rot : T2I<(outs GPR:$dst), (ins GPR:$Src, i32imm:$rot), + opc, " $dst, $Src, ror $rot", + [(set GPR:$dst, (opnode (rotr GPR:$Src, rot_imm:$rot)))]>; +} + +/// T2I_bin_rrot - A binary operation with two forms: one whose operand is a +/// register and one whose operand is a register rotated by 8/16/24. +multiclass T2I_bin_rrot<string opc, PatFrag opnode> { + def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), + opc, " $dst, $LHS, $RHS", + [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>; + def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), + opc, " $dst, $LHS, $RHS, ror $rot", + [(set GPR:$dst, (opnode GPR:$LHS, + (rotr GPR:$RHS, rot_imm:$rot)))]>; +} + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -409,6 +509,209 @@ def t2ADDrSPs : T2XI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), // Load / store Instructions. // +// Load +let canFoldAsLoad = 1 in +defm t2LDR : T2I_ld<"ldr", UnOpFrag<(load node:$Src)>>; + +// Loads with zero extension +defm t2LDRH : T2I_ld<"ldrh", UnOpFrag<(zextloadi16 node:$Src)>>; +defm t2LDRB : T2I_ld<"ldrb", UnOpFrag<(zextloadi8 node:$Src)>>; + +// Loads with sign extension +defm t2LDRSH : T2I_ld<"ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>; +defm t2LDRSB : T2I_ld<"ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>; + +let mayLoad = 1 in { +// Load doubleword +def t2LDRDi8 : T2Ii8s4<(outs GPR:$dst), (ins t2addrmode_imm8s4:$addr), + "ldrd", " $dst, $addr", []>; +def t2LDRDpci : T2Ii8s4<(outs GPR:$dst), (ins i32imm:$addr), + "ldrd", " $dst, $addr", []>; +} + +// zextload i1 -> zextload i8 +def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr), + (t2LDRBi12 t2addrmode_imm12:$addr)>; +def : T2Pat<(zextloadi1 t2addrmode_imm8:$addr), + (t2LDRBi8 t2addrmode_imm8:$addr)>; +def : T2Pat<(zextloadi1 t2addrmode_so_reg:$addr), + (t2LDRBs t2addrmode_so_reg:$addr)>; +def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)), + (t2LDRBpci tconstpool:$addr)>; + +// extload -> zextload +// FIXME: Reduce the number of patterns by legalizing extload to zextload +// earlier? +def : T2Pat<(extloadi1 t2addrmode_imm12:$addr), + (t2LDRBi12 t2addrmode_imm12:$addr)>; +def : T2Pat<(extloadi1 t2addrmode_imm8:$addr), + (t2LDRBi8 t2addrmode_imm8:$addr)>; +def : T2Pat<(extloadi1 t2addrmode_so_reg:$addr), + (t2LDRBs t2addrmode_so_reg:$addr)>; +def : T2Pat<(extloadi1 (ARMWrapper tconstpool:$addr)), + (t2LDRBpci tconstpool:$addr)>; + +def : T2Pat<(extloadi8 t2addrmode_imm12:$addr), + (t2LDRBi12 t2addrmode_imm12:$addr)>; +def : T2Pat<(extloadi8 t2addrmode_imm8:$addr), + (t2LDRBi8 t2addrmode_imm8:$addr)>; +def : T2Pat<(extloadi8 t2addrmode_so_reg:$addr), + (t2LDRBs t2addrmode_so_reg:$addr)>; +def : T2Pat<(extloadi8 (ARMWrapper tconstpool:$addr)), + (t2LDRBpci tconstpool:$addr)>; + +def : T2Pat<(extloadi16 t2addrmode_imm12:$addr), + (t2LDRHi12 t2addrmode_imm12:$addr)>; +def : T2Pat<(extloadi16 t2addrmode_imm8:$addr), + (t2LDRHi8 t2addrmode_imm8:$addr)>; +def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr), + (t2LDRHs t2addrmode_so_reg:$addr)>; +def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), + (t2LDRHpci tconstpool:$addr)>; + +// Indexed loads +let mayLoad = 1 in { +def t2LDR_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), + (ins t2addrmode_imm8:$addr), + AddrModeT2_i8, IndexModePre, + "ldr", " $dst, $addr!", "$addr.base = $base_wb", + []>; + +def t2LDR_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, + "ldr", " $dst, [$base], $offset", "$base = $base_wb", + []>; + +def t2LDRB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), + (ins t2addrmode_imm8:$addr), + AddrModeT2_i8, IndexModePre, + "ldrb", " $dst, $addr!", "$addr.base = $base_wb", + []>; +def t2LDRB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, + "ldrb", " $dst, [$base], $offset", "$base = $base_wb", + []>; + +def t2LDRH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), + (ins t2addrmode_imm8:$addr), + AddrModeT2_i8, IndexModePre, + "ldrh", " $dst, $addr!", "$addr.base = $base_wb", + []>; +def t2LDRH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, + "ldrh", " $dst, [$base], $offset", "$base = $base_wb", + []>; + +def t2LDRSB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), + (ins t2addrmode_imm8:$addr), + AddrModeT2_i8, IndexModePre, + "ldrsb", " $dst, $addr!", "$addr.base = $base_wb", + []>; +def t2LDRSB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, + "ldrsb", " $dst, [$base], $offset", "$base = $base_wb", + []>; + +def t2LDRSH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), + (ins t2addrmode_imm8:$addr), + AddrModeT2_i8, IndexModePre, + "ldrsh", " $dst, $addr!", "$addr.base = $base_wb", + []>; +def t2LDRSH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, + "ldrsh", " $dst, [$base], $offset", "$base = $base_wb", + []>; +} + +// Store +defm t2STR : T2I_st<"str", BinOpFrag<(store node:$LHS, node:$RHS)>>; +defm t2STRB : T2I_st<"strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; +defm t2STRH : T2I_st<"strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; + +// Store doubleword +let mayLoad = 1 in +def t2STRDi8 : T2Ii8s4<(outs), (ins GPR:$src, t2addrmode_imm8s4:$addr), + "strd", " $src, $addr", []>; + +// Indexed stores +def t2STR_PRE : T2Iidxldst<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePre, + "str", " $src, [$base, $offset]!", "$base = $base_wb", + [(set GPR:$base_wb, + (pre_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; + +def t2STR_POST : T2Iidxldst<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, + "str", " $src, [$base], $offset", "$base = $base_wb", + [(set GPR:$base_wb, + (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; + +def t2STRH_PRE : T2Iidxldst<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePre, + "strh", " $src, [$base, $offset]!", "$base = $base_wb", + [(set GPR:$base_wb, + (pre_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; + +def t2STRH_POST : T2Iidxldst<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, + "strh", " $src, [$base], $offset", "$base = $base_wb", + [(set GPR:$base_wb, + (post_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; + +def t2STRB_PRE : T2Iidxldst<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePre, + "strb", " $src, [$base, $offset]!", "$base = $base_wb", + [(set GPR:$base_wb, + (pre_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; + +def t2STRB_POST : T2Iidxldst<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), + AddrModeT2_i8, IndexModePost, + "strb", " $src, [$base], $offset", "$base = $base_wb", + [(set GPR:$base_wb, + (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; + + +// Address computation and loads and stores in PIC mode. +let isNotDuplicable = 1, AddedComplexity = 10 in { +let canFoldAsLoad = 1 in +def t2PICLDR : T2I_picld<"ldr", UnOpFrag<(load node:$Src)>>; + +def t2PICLDRH : T2I_picld<"ldrh", UnOpFrag<(zextloadi16 node:$Src)>>; +def t2PICLDRB : T2I_picld<"ldrb", UnOpFrag<(zextloadi8 node:$Src)>>; +def t2PICLDRSH : T2I_picld<"ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>; +def t2PICLDRSB : T2I_picld<"ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>; + +def t2PICSTR : T2I_picst<"str", BinOpFrag<(store node:$LHS, node:$RHS)>>; +def t2PICSTRH : T2I_picst<"strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; +def t2PICSTRB : T2I_picst<"strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; +} // isNotDuplicable = 1, AddedComplexity = 10 + + +//===----------------------------------------------------------------------===// +// Load / store multiple Instructions. +// + +let mayLoad = 1 in +def t2LDM : T2XI<(outs), + (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops), + "ldm${p}${addr:submode} $addr, $dst1", []>; + +let mayStore = 1 in +def t2STM : T2XI<(outs), + (ins addrmode4:$addr, pred:$p, reglist:$src1, variable_ops), + "stm${p}${addr:submode} $addr, $src1", []>; + //===----------------------------------------------------------------------===// // Move Instructions. // @@ -435,6 +738,40 @@ def t2MOVTi16 : T2sI<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), (or (and GPR:$src, 0xffff), t2_lo16AllZero:$imm))]>; //===----------------------------------------------------------------------===// +// Extend Instructions. +// + +// Sign extenders + +defm t2SXTB : T2I_unary_rrot<"sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; +defm t2SXTH : T2I_unary_rrot<"sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; + +defm t2SXTAB : T2I_bin_rrot<"sxtab", + BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; +defm t2SXTAH : T2I_bin_rrot<"sxtah", + BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; + +// TODO: SXT(A){B|H}16 + +// Zero extenders + +let AddedComplexity = 16 in { +defm t2UXTB : T2I_unary_rrot<"uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>; +defm t2UXTH : T2I_unary_rrot<"uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>; +defm t2UXTB16 : T2I_unary_rrot<"uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; + +def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), + (t2UXTB16r_rot GPR:$Src, 24)>; +def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), + (t2UXTB16r_rot GPR:$Src, 8)>; + +defm t2UXTAB : T2I_bin_rrot<"uxtab", + BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; +defm t2UXTAH : T2I_bin_rrot<"uxtah", + BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; +} + +//===----------------------------------------------------------------------===// // Arithmetic Instructions. // @@ -563,21 +900,24 @@ def t2REVSH : T2I<(outs GPR:$dst), (ins GPR:$src), defm t2CMP : T2I_cmp_is<"cmp", BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; -defm t2CMPnz : T2I_cmp_is<"cmp", - BinOpFrag<(ARMcmpNZ node:$LHS, node:$RHS)>>; +defm t2CMPz : T2I_cmp_is<"cmp", + BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>; defm t2CMN : T2I_cmp_is<"cmn", BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; -defm t2CMNnz : T2I_cmp_is<"cmn", - BinOpFrag<(ARMcmpNZ node:$LHS,(ineg node:$RHS))>>; +defm t2CMNz : T2I_cmp_is<"cmn", + BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>; def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm), (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>; -def : T2Pat<(ARMcmpNZ GPR:$src, t2_so_imm_neg:$imm), +def : T2Pat<(ARMcmpZ GPR:$src, t2_so_imm_neg:$imm), (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>; -// FIXME: TST, TEQ, etc. +defm t2TST : T2I_cmp_is<"tst", + BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>>; +defm t2TEQ : T2I_cmp_is<"teq", + BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>; // A8.6.27 CBNZ, CBZ - Compare and branch on (non)zero. // Short range conditional branch. Looks awesome for loops. Need to figure @@ -585,6 +925,42 @@ def : T2Pat<(ARMcmpNZ GPR:$src, t2_so_imm_neg:$imm), // FIXME: Conditional moves +//===----------------------------------------------------------------------===// +// Control-Flow Instructions +// + +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { +let isPredicable = 1 in +def t2B : T2XI<(outs), (ins brtarget:$target), + "b $target", + [(br bb:$target)]>; + +let isNotDuplicable = 1, isIndirectBranch = 1 in { +def t2BR_JTr : T2JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), + "mov pc, $target \n$jt", + [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>; + +def t2BR_JTm : + T2JTI<(outs), + (ins t2addrmode_so_reg:$target, jtblock_operand:$jt, i32imm:$id), + "ldr pc, $target \n$jt", + [(ARMbrjt (i32 (load t2addrmode_so_reg:$target)), tjumptable:$jt, + imm:$id)]>; + +def t2BR_JTadd : + T2JTI<(outs), + (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id), + "add pc, $target, $idx \n$jt", + [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, imm:$id)]>; +} // isNotDuplicate, isIndirectBranch +} // isBranch, isTerminator, isBarrier + +// FIXME: should be able to write a pattern for ARMBrcond, but can't use +// a two-value operand where a dag node expects two operands. :( +let isBranch = 1, isTerminator = 1 in +def t2Bcc : T2I<(outs), (ins brtarget:$target), + "b", " $target", + [/*(ARMbrcond bb:$target, imm:$cc)*/]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 0b0e289..66d3df6 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -30,6 +30,11 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// Used to initialized Align, so must precede it. bool isThumb; + /// hasThumb2 - True if the target architecture supports Thumb2. Do not use + /// to determine if function is compiled under Thumb mode, for that use + /// 'isThumb'. + bool hasThumb2; + /// Align - required alignment. ARM functions and Thumb functions with /// constant pools require 4-byte alignment; other Thumb functions /// require only 2-byte alignment. @@ -91,7 +96,8 @@ class ARMFunctionInfo : public MachineFunctionInfo { public: ARMFunctionInfo() : - isThumb(false), + isThumb(false), + hasThumb2(false), Align(2U), VarArgsRegSaveSize(0), HasStackFrame(false), LRSpilledForFarJump(false), R3IsLiveIn(false), @@ -102,6 +108,7 @@ public: explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), + hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()), Align(isThumb ? 1U : 2U), VarArgsRegSaveSize(0), HasStackFrame(false), LRSpilledForFarJump(false), R3IsLiveIn(false), @@ -112,6 +119,7 @@ public: JumpTableUId(0), ConstPoolEntryUId(0) {} bool isThumbFunction() const { return isThumb; } + bool isThumb2Function() const { return isThumb && hasThumb2; } unsigned getAlign() const { return Align; } void setAlign(unsigned a) { Align = a; } diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index bb0cc8f..f809f37 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -31,16 +31,9 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/CommandLine.h" -#include <algorithm> using namespace llvm; -static cl::opt<bool> ThumbRegScavenging("enable-thumb-reg-scavenging", - cl::Hidden, - cl::desc("Enable register scavenging on Thumb")); - -unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum) { +unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum) { using namespace ARM; switch (RegEnum) { case R0: case S0: case D0: return 0; @@ -81,8 +74,8 @@ unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum) { } } -unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum, - bool &isSPVFP) { +unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, + bool &isSPVFP) { isSPVFP = false; using namespace ARM; @@ -108,12 +101,12 @@ unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum, case PC: case D15: return 15; case S0: case S1: case S2: case S3: - case S4: case S5: case S6: case S7: - case S8: case S9: case S10: case S11: - case S12: case S13: case S14: case S15: - case S16: case S17: case S18: case S19: - case S20: case S21: case S22: case S23: - case S24: case S25: case S26: case S27: + case S4: case S5: case S6: case S7: + case S8: case S9: case S10: case S11: + case S12: case S13: case S14: case S15: + case S16: case S17: case S18: case S19: + case S20: case S21: case S22: case S23: + case S24: case S25: case S26: case S27: case S28: case S29: case S30: case S31: { isSPVFP = true; switch (RegEnum) { @@ -155,13 +148,18 @@ unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum, } } -ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii, - const ARMSubtarget &sti) +ARMBaseRegisterInfo::ARMBaseRegisterInfo(const TargetInstrInfo &tii, + const ARMSubtarget &sti) : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), TII(tii), STI(sti), FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) { } +ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii, + const ARMSubtarget &sti) + : ARMBaseRegisterInfo(tii, sti) { +} + static inline const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) { return MIB.addImm((int64_t)ARMCC::AL).addReg(0); @@ -176,55 +174,22 @@ const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) { /// specified immediate. void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + const TargetInstrInfo *TII, DebugLoc dl, unsigned DestReg, int Val, - unsigned Pred, unsigned PredReg, - const TargetInstrInfo *TII, - bool isThumb, - DebugLoc dl) const { + ARMCC::CondCodes Pred, + unsigned PredReg) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); Constant *C = ConstantInt::get(Type::Int32Ty, Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); - if (isThumb) - BuildMI(MBB, MBBI, dl, - TII->get(ARM::tLDRcp),DestReg).addConstantPoolIndex(Idx); - else - BuildMI(MBB, MBBI, dl, TII->get(ARM::LDRcp), DestReg) - .addConstantPoolIndex(Idx) - .addReg(0).addImm(0).addImm(Pred).addReg(PredReg); -} -/// isLowRegister - Returns true if the register is low register r0-r7. -/// -bool ARMRegisterInfo::isLowRegister(unsigned Reg) const { - using namespace ARM; - switch (Reg) { - case R0: case R1: case R2: case R3: - case R4: case R5: case R6: case R7: - return true; - default: - return false; - } -} - -const TargetRegisterClass* -ARMRegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const { - if (STI.isThumb()) { - if (isLowRegister(Reg)) - return ARM::tGPRRegisterClass; - switch (Reg) { - default: - break; - case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: - case ARM::R12: case ARM::SP: case ARM::LR: case ARM::PC: - return ARM::GPRRegisterClass; - } - } - return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT); + BuildMI(MBB, MBBI, dl, TII->get(ARM::LDRcp), DestReg) + .addConstantPoolIndex(Idx) + .addReg(0).addImm(0).addImm(Pred).addReg(PredReg); } const unsigned* -ARMRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { +ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { static const unsigned CalleeSavedRegs[] = { ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8, ARM::R7, ARM::R6, ARM::R5, ARM::R4, @@ -248,7 +213,7 @@ ARMRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } const TargetRegisterClass* const * -ARMRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { +ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { static const TargetRegisterClass * const CalleeSavedRegClasses[] = { &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, @@ -297,7 +262,7 @@ ARMRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses; } -BitVector ARMRegisterInfo::getReservedRegs(const MachineFunction &MF) const { +BitVector ARMBaseRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // FIXME: avoid re-calculating this everytime. BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); @@ -311,7 +276,7 @@ BitVector ARMRegisterInfo::getReservedRegs(const MachineFunction &MF) const { } bool -ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const { +ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const { switch (Reg) { default: break; case ARM::SP: @@ -329,16 +294,16 @@ ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const { return false; } -const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const { +const TargetRegisterClass *ARMBaseRegisterInfo::getPointerRegClass() const { return &ARM::GPRRegClass; } /// getAllocationOrder - Returns the register allocation order for a specified /// register class in the form of a pair of TargetRegisterClass iterators. std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> -ARMRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, - unsigned HintType, unsigned HintReg, - const MachineFunction &MF) const { +ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, + unsigned HintType, unsigned HintReg, + const MachineFunction &MF) const { // Alternative register allocation orders when favoring even / odd registers // of register pairs. @@ -479,8 +444,8 @@ ARMRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, /// ResolveRegAllocHint - Resolves the specified register allocation hint /// to a physical register. Returns the physical register if it is successful. unsigned -ARMRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg, - const MachineFunction &MF) const { +ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg, + const MachineFunction &MF) const { if (Reg == 0 || !isPhysicalRegister(Reg)) return 0; if (Type == 0) @@ -495,8 +460,8 @@ ARMRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg, } void -ARMRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, - MachineFunction &MF) const { +ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, + MachineFunction &MF) const { MachineRegisterInfo *MRI = &MF.getRegInfo(); std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg); if ((Hint.first == (unsigned)ARMRI::RegPairOdd || @@ -516,15 +481,14 @@ ARMRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, bool ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - return ThumbRegScavenging || !AFI->isThumbFunction(); + return true; } /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. /// -bool ARMRegisterInfo::hasFP(const MachineFunction &MF) const { +bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); return (NoFramePointerElim || MFI->hasVarSizedObjects() || @@ -539,18 +503,13 @@ bool ARMRegisterInfo::hasFP(const MachineFunction &MF) const { bool ARMRegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // It's not always a good idea to include the call frame as part of the // stack frame. ARM (especially Thumb) has small immediate offset to // address the stack frame. So a large call frame can cause poor codegen // and may even makes it impossible to scavenge a register. - if (AFI->isThumbFunction()) { - if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 - return false; - } else { - if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 - return false; - } + if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 + return false; + return !MF.getFrameInfo()->hasVarSizedObjects(); } @@ -570,14 +529,14 @@ void emitARMRegPlusImmediate(MachineBasicBlock &MBB, unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); assert(ThisVal && "Didn't extract field correctly"); - + // We will handle these bits from offset, clear them. NumBytes &= ~ThisVal; - + // Get the properly encoded SOImmVal field. int SOImmVal = ARM_AM::getSOImmVal(ThisVal); assert(SOImmVal != -1 && "Bit extraction didn't work?"); - + // Build the new ADD / SUB. BuildMI(MBB, MBBI, dl, TII.get(isSub ? ARM::SUBri : ARM::ADDri), DestReg) .addReg(BaseReg, RegState::Kill).addImm(SOImmVal) @@ -586,204 +545,13 @@ void emitARMRegPlusImmediate(MachineBasicBlock &MBB, } } -/// calcNumMI - Returns the number of instructions required to materialize -/// the specific add / sub r, c instruction. -static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes, - unsigned NumBits, unsigned Scale) { - unsigned NumMIs = 0; - unsigned Chunk = ((1 << NumBits) - 1) * Scale; - - if (Opc == ARM::tADDrSPi) { - unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; - Bytes -= ThisVal; - NumMIs++; - NumBits = 8; - Scale = 1; // Followed by a number of tADDi8. - Chunk = ((1 << NumBits) - 1) * Scale; - } - - NumMIs += Bytes / Chunk; - if ((Bytes % Chunk) != 0) - NumMIs++; - if (ExtraOpc) - NumMIs++; - return NumMIs; -} - -/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize -/// a destreg = basereg + immediate in Thumb code. Materialize the immediate -/// in a register using mov / mvn sequences or load the immediate from a -/// constpool entry. -static -void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned DestReg, unsigned BaseReg, - int NumBytes, bool CanChangeCC, - const TargetInstrInfo &TII, - const ARMRegisterInfo& MRI, - DebugLoc dl) { - bool isHigh = !MRI.isLowRegister(DestReg) || - (BaseReg != 0 && !MRI.isLowRegister(BaseReg)); - bool isSub = false; - // Subtract doesn't have high register version. Load the negative value - // if either base or dest register is a high register. Also, if do not - // issue sub as part of the sequence if condition register is to be - // preserved. - if (NumBytes < 0 && !isHigh && CanChangeCC) { - isSub = true; - NumBytes = -NumBytes; - } - unsigned LdReg = DestReg; - if (DestReg == ARM::SP) { - assert(BaseReg == ARM::SP && "Unexpected!"); - LdReg = ARM::R3; - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) - .addReg(ARM::R3, RegState::Kill); - } - - if (NumBytes <= 255 && NumBytes >= 0) - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes); - else if (NumBytes < 0 && NumBytes >= -255) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg) - .addReg(LdReg, RegState::Kill); - } else - MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, ARMCC::AL, 0, &TII, - true, dl); - - // Emit add / sub. - int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr); - const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, - TII.get(Opc), DestReg); - if (DestReg == ARM::SP || isSub) - MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill); - else - MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); - if (DestReg == ARM::SP) - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3) - .addReg(ARM::R12, RegState::Kill); -} - -/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize -/// a destreg = basereg + immediate in Thumb code. -static -void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned DestReg, unsigned BaseReg, - int NumBytes, const TargetInstrInfo &TII, - const ARMRegisterInfo& MRI, - DebugLoc dl) { - bool isSub = NumBytes < 0; - unsigned Bytes = (unsigned)NumBytes; - if (isSub) Bytes = -NumBytes; - bool isMul4 = (Bytes & 3) == 0; - bool isTwoAddr = false; - bool DstNotEqBase = false; - unsigned NumBits = 1; - unsigned Scale = 1; - int Opc = 0; - int ExtraOpc = 0; - - if (DestReg == BaseReg && BaseReg == ARM::SP) { - assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); - NumBits = 7; - Scale = 4; - Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; - isTwoAddr = true; - } else if (!isSub && BaseReg == ARM::SP) { - // r1 = add sp, 403 - // => - // r1 = add sp, 100 * 4 - // r1 = add r1, 3 - if (!isMul4) { - Bytes &= ~3; - ExtraOpc = ARM::tADDi3; - } - NumBits = 8; - Scale = 4; - Opc = ARM::tADDrSPi; - } else { - // sp = sub sp, c - // r1 = sub sp, c - // r8 = sub sp, c - if (DestReg != BaseReg) - DstNotEqBase = true; - NumBits = 8; - Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; - isTwoAddr = true; - } - - unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale); - unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2; - if (NumMIs > Threshold) { - // This will expand into too many instructions. Load the immediate from a - // constpool entry. - emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII, - MRI, dl); - return; - } - - if (DstNotEqBase) { - if (MRI.isLowRegister(DestReg) && MRI.isLowRegister(BaseReg)) { - // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7) - unsigned Chunk = (1 << 3) - 1; - unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; - Bytes -= ThisVal; - BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg) - .addReg(BaseReg, RegState::Kill).addImm(ThisVal); - } else { - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) - .addReg(BaseReg, RegState::Kill); - } - BaseReg = DestReg; - } - - unsigned Chunk = ((1 << NumBits) - 1) * Scale; - while (Bytes) { - unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; - Bytes -= ThisVal; - ThisVal /= Scale; - // Build the new tADD / tSUB. - if (isTwoAddr) - BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(DestReg).addImm(ThisVal); - else { - bool isKill = BaseReg != ARM::SP; - BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); - BaseReg = DestReg; - - if (Opc == ARM::tADDrSPi) { - // r4 = add sp, imm - // r4 = add r4, imm - // ... - NumBits = 8; - Scale = 1; - Chunk = ((1 << NumBits) - 1) * Scale; - Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; - isTwoAddr = true; - } - } - } - - if (ExtraOpc) - BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg) - .addReg(DestReg, RegState::Kill) - .addImm(((unsigned)NumBytes) & 3); -} - -static -void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, - bool isThumb, const TargetInstrInfo &TII, - const ARMRegisterInfo& MRI, - DebugLoc dl) { - if (isThumb) - emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII, - MRI, dl); - else - emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII, dl); +static void +emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + const TargetInstrInfo &TII, DebugLoc dl, + int NumBytes, + ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { + emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, + Pred, PredReg, TII, dl); } void ARMRegisterInfo:: @@ -797,7 +565,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, DebugLoc dl = Old->getDebugLoc(); unsigned Amount = Old->getOperand(0).getImm(); if (Amount != 0) { - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. @@ -806,46 +573,22 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // Replace the pseudo instruction with a new instruction... unsigned Opc = Old->getOpcode(); - bool isThumb = AFI->isThumbFunction(); - ARMCC::CondCodes Pred = isThumb - ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(1).getImm(); + ARMCC::CondCodes Pred = (ARMCC::CondCodes)Old->getOperand(1).getImm(); if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. - unsigned PredReg = isThumb ? 0 : Old->getOperand(2).getReg(); - emitSPUpdate(MBB, I, -Amount, Pred, PredReg, isThumb, TII, *this, dl); + unsigned PredReg = Old->getOperand(2).getReg(); + emitSPUpdate(MBB, I, TII, dl, -Amount, Pred, PredReg); } else { // Note: PredReg is operand 3 for ADJCALLSTACKUP. - unsigned PredReg = isThumb ? 0 : Old->getOperand(3).getReg(); + unsigned PredReg = Old->getOperand(3).getReg(); assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); - emitSPUpdate(MBB, I, Amount, Pred, PredReg, isThumb, TII, *this, dl); + emitSPUpdate(MBB, I, TII, dl, Amount, Pred, PredReg); } } } MBB.erase(I); } -/// emitThumbConstant - Emit a series of instructions to materialize a -/// constant. -static void emitThumbConstant(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned DestReg, int Imm, - const TargetInstrInfo &TII, - const ARMRegisterInfo& MRI, - DebugLoc dl) { - bool isSub = Imm < 0; - if (isSub) Imm = -Imm; - - int Chunk = (1 << 8) - 1; - int ThisVal = (Imm > Chunk) ? Chunk : Imm; - Imm -= ThisVal; - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal); - if (Imm > 0) - emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl); - if (isSub) - BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg) - .addReg(DestReg, RegState::Kill); -} - /// findScratchRegister - Find a 'free' ARM register. If register scavenger /// is not being used, R12 is available. Otherwise, try for a call-clobbered /// register first and then a spilled callee-saved register if that fails. @@ -868,17 +611,16 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - bool isThumb = AFI->isThumbFunction(); DebugLoc dl = MI.getDebugLoc(); while (!MI.getOperand(i).isFI()) { ++i; assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } - + unsigned FrameReg = ARM::SP; int FrameIndex = MI.getOperand(i).getIndex(); - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + MF.getFrameInfo()->getStackSize() + SPAdj; if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) @@ -926,86 +668,20 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i+1).ChangeToImmediate(ImmedOffset); return; } - + // Otherwise, we fallback to common code below to form the imm offset with // a sequence of ADDri instructions. First though, pull as much of the imm // into this ADDri as possible. unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); - + // We will handle these bits from offset, clear them. Offset &= ~ThisImmVal; - + // Get the properly encoded SOImmVal field. int ThisSOImmVal = ARM_AM::getSOImmVal(ThisImmVal); - assert(ThisSOImmVal != -1 && "Bit extraction didn't work?"); + assert(ThisSOImmVal != -1 && "Bit extraction didn't work?"); MI.getOperand(i+1).ChangeToImmediate(ThisSOImmVal); - } else if (Opcode == ARM::tADDrSPi) { - Offset += MI.getOperand(i+1).getImm(); - - // Can't use tADDrSPi if it's based off the frame pointer. - unsigned NumBits = 0; - unsigned Scale = 1; - if (FrameReg != ARM::SP) { - Opcode = ARM::tADDi3; - MI.setDesc(TII.get(ARM::tADDi3)); - NumBits = 3; - } else { - NumBits = 8; - Scale = 4; - assert((Offset & 3) == 0 && - "Thumb add/sub sp, #imm immediate must be multiple of 4!"); - } - - if (Offset == 0) { - // Turn it into a move. - MI.setDesc(TII.get(ARM::tMOVhir2lor)); - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.RemoveOperand(i+1); - return; - } - - // Common case: small offset, fits into instruction. - unsigned Mask = (1 << NumBits) - 1; - if (((Offset / Scale) & ~Mask) == 0) { - // Replace the FrameIndex with sp / fp - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Offset / Scale); - return; - } - - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned Bytes = (Offset > 0) ? Offset : -Offset; - unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale); - // MI would expand into a large number of instructions. Don't try to - // simplify the immediate. - if (NumMIs > 2) { - emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII, - *this, dl); - MBB.erase(II); - return; - } - - if (Offset > 0) { - // Translate r0 = add sp, imm to - // r0 = add sp, 255*4 - // r0 = add r0, (imm - 255*4) - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Mask); - Offset = (Offset - Mask * Scale); - MachineBasicBlock::iterator NII = next(II); - emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII, - *this, dl); - } else { - // Translate r0 = add sp, -imm to - // r0 = -imm (this is then translated into a series of instructons) - // r0 = add r0, sp - emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl); - MI.setDesc(TII.get(ARM::tADDhirr)); - MI.getOperand(i).ChangeToRegister(DestReg, false, false, true); - MI.getOperand(i+1).ChangeToRegister(FrameReg, false); - } - return; } else { unsigned ImmIdx = 0; int InstrOffs = 0; @@ -1037,13 +713,6 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Scale = 4; break; } - case ARMII::AddrModeTs: { - ImmIdx = i+1; - InstrOffs = MI.getOperand(ImmIdx).getImm(); - NumBits = (FrameReg == ARM::SP) ? 8 : 5; - Scale = 4; - break; - } default: assert(0 && "Unsupported addressing mode!"); abort(); @@ -1052,7 +721,7 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset += InstrOffs * Scale; assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); - if (Offset < 0 && !isThumb) { + if (Offset < 0) { Offset = -Offset; isSub = true; } @@ -1070,121 +739,34 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, return; } - bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill; - if (AddrMode == ARMII::AddrModeTs) { - // Thumb tLDRspi, tSTRspi. These will change to instructions that use - // a different base register. - NumBits = 5; - Mask = (1 << NumBits) - 1; - } - // If this is a thumb spill / restore, we will be using a constpool load to - // materialize the offset. - if (AddrMode == ARMII::AddrModeTs && isThumSpillRestore) - ImmOp.ChangeToImmediate(0); - else { - // Otherwise, it didn't fit. Pull in what we can to simplify the immed. - ImmedOffset = ImmedOffset & Mask; - if (isSub) - ImmedOffset |= 1 << NumBits; - ImmOp.ChangeToImmediate(ImmedOffset); - Offset &= ~(Mask*Scale); - } + // Otherwise, it didn't fit. Pull in what we can to simplify the immed. + ImmedOffset = ImmedOffset & Mask; + if (isSub) + ImmedOffset |= 1 << NumBits; + ImmOp.ChangeToImmediate(ImmedOffset); + Offset &= ~(Mask*Scale); } - + // If we get here, the immediate doesn't fit into the instruction. We folded // as much as possible above, handle the rest, providing a register that is // SP+LargeImm. assert(Offset && "This code isn't needed if offset already handled!"); - if (isThumb) { - if (Desc.mayLoad()) { - // Use the destination register to materialize sp + offset. - unsigned TmpReg = MI.getOperand(0).getReg(); - bool UseRR = false; - if (Opcode == ARM::tRestore) { - if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, - Offset, false, TII, *this, dl); - else { - emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII, - true, dl); - UseRR = true; - } - } else - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, - *this, dl); - MI.setDesc(TII.get(ARM::tLDR)); - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); - if (UseRR) - // Use [reg, reg] addrmode. - MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); - else // tLDR has an extra register operand. - MI.addOperand(MachineOperand::CreateReg(0, false)); - } else if (Desc.mayStore()) { - // FIXME! This is horrific!!! We need register scavenging. - // Our temporary workaround has marked r3 unavailable. Of course, r3 is - // also a ABI register so it's possible that is is the register that is - // being storing here. If that's the case, we do the following: - // r12 = r2 - // Use r2 to materialize sp + offset - // str r3, r2 - // r2 = r12 - unsigned ValReg = MI.getOperand(0).getReg(); - unsigned TmpReg = ARM::R3; - bool UseRR = false; - if (ValReg == ARM::R3) { - BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) - .addReg(ARM::R2, RegState::Kill); - TmpReg = ARM::R2; - } - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) - BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) - .addReg(ARM::R3, RegState::Kill); - if (Opcode == ARM::tSpill) { - if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, - Offset, false, TII, *this, dl); - else { - emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII, - true, dl); - UseRR = true; - } - } else - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, - *this, dl); - MI.setDesc(TII.get(ARM::tSTR)); - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); - if (UseRR) // Use [reg, reg] addrmode. - MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); - else // tSTR has an extra register operand. - MI.addOperand(MachineOperand::CreateReg(0, false)); - - MachineBasicBlock::iterator NII = next(II); - if (ValReg == ARM::R3) - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2) - .addReg(ARM::R12, RegState::Kill); - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3) - .addReg(ARM::R12, RegState::Kill); - } else - assert(false && "Unexpected opcode!"); - } else { - // Insert a set of r12 with the full address: r12 = sp + offset - // If the offset we have is too large to fit into the instruction, we need - // to form it with a series of ADDri's. Do this by taking 8-bit chunks - // out of 'Offset'. - unsigned ScratchReg = findScratchRegister(RS, &ARM::GPRRegClass, AFI); - if (ScratchReg == 0) - // No register is "free". Scavenge a register. - ScratchReg = RS->scavengeRegister(&ARM::GPRRegClass, II, SPAdj); - int PIdx = MI.findFirstPredOperandIdx(); - ARMCC::CondCodes Pred = (PIdx == -1) - ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); - unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg(); - emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg, - isSub ? -Offset : Offset, Pred, PredReg, TII, dl); - MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); - } + // Insert a set of r12 with the full address: r12 = sp + offset + // If the offset we have is too large to fit into the instruction, we need + // to form it with a series of ADDri's. Do this by taking 8-bit chunks + // out of 'Offset'. + unsigned ScratchReg = findScratchRegister(RS, &ARM::GPRRegClass, AFI); + if (ScratchReg == 0) + // No register is "free". Scavenge a register. + ScratchReg = RS->scavengeRegister(&ARM::GPRRegClass, II, SPAdj); + int PIdx = MI.findFirstPredOperandIdx(); + ARMCC::CondCodes Pred = (PIdx == -1) + ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); + unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg(); + emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg, + isSub ? -Offset : Offset, Pred, PredReg, TII, dl); + MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); } static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) { @@ -1206,8 +788,8 @@ static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) { } void -ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { +ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { // This tells PEI to spill the FP as if it is any other callee-save register // to take advantage the eliminateFrameIndex machinery. This also ensures it // is spilled in the order specified by getCalleeSavedRegs() to make it easier @@ -1266,7 +848,7 @@ ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, default: break; } - } else { + } else { if (!STI.isTargetDarwin()) { UnspilledCS1GPRs.push_back(Reg); continue; @@ -1332,7 +914,8 @@ ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { unsigned Reg = UnspilledCS1GPRs[i]; // Don't spiil high register if the function is thumb - if (!AFI->isThumbFunction() || isLowRegister(Reg) || Reg == ARM::LR) { + if (!AFI->isThumbFunction() || + isARMLowRegister(Reg) || Reg == ARM::LR) { MF.getRegInfo().setPhysRegUsed(Reg); AFI->setCSRegisterIsSpilled(Reg); if (!isReservedReg(MF, Reg)) @@ -1351,7 +934,7 @@ ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } // Estimate if we might need to scavenge a register at some point in order - // to materialize a stack offset. If so, either spill one additiona + // to materialize a stack offset. If so, either spill one additional // callee-saved register or reserve a special spill slot to facilitate // register scavenging. if (RS && !ExtraCSSpill && !AFI->isThumbFunction()) { @@ -1460,40 +1043,23 @@ void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const { MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - bool isThumb = AFI->isThumbFunction(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); - if (isThumb) { - // Check if R3 is live in. It might have to be used as a scratch register. - for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(), - E = MF.getRegInfo().livein_end(); I != E; ++I) { - if (I->first == ARM::R3) { - AFI->setR3IsLiveIn(true); - break; - } - } - - // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. - NumBytes = (NumBytes + 3) & ~3; - MFI->setStackSize(NumBytes); - } - // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; if (VARegSaveSize) - emitSPUpdate(MBB, MBBI, -VARegSaveSize, ARMCC::AL, 0, isThumb, TII, - *this, dl); + emitSPUpdate(MBB, MBBI, TII, dl, -VARegSaveSize); if (!AFI->hasStackFrame()) { if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl); + emitSPUpdate(MBB, MBBI, TII, dl, -NumBytes); return; } @@ -1531,34 +1097,25 @@ void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const { } } - if (!isThumb) { - // Build the new SUBri to adjust SP for integer callee-save spill area 1. - emitSPUpdate(MBB, MBBI, -GPRCS1Size, ARMCC::AL, 0, isThumb, TII, *this, dl); - movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI); - } else if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { - ++MBBI; - if (MBBI != MBB.end()) - dl = MBBI->getDebugLoc(); - } + // Build the new SUBri to adjust SP for integer callee-save spill area 1. + emitSPUpdate(MBB, MBBI, TII, dl, -GPRCS1Size); + movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI); // Darwin ABI requires FP to point to the stack slot that contains the // previous FP. if (STI.isTargetDarwin() || hasFP(MF)) { MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(isThumb ? ARM::tADDrSPi : ARM::ADDri), - FramePtr) + BuildMI(MBB, MBBI, dl, TII.get(ARM::ADDri), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0); - if (!isThumb) AddDefaultCC(AddDefaultPred(MIB)); + AddDefaultCC(AddDefaultPred(MIB)); } - if (!isThumb) { - // Build the new SUBri to adjust SP for integer callee-save spill area 2. - emitSPUpdate(MBB, MBBI, -GPRCS2Size, ARMCC::AL, 0, false, TII, *this, dl); + // Build the new SUBri to adjust SP for integer callee-save spill area 2. + emitSPUpdate(MBB, MBBI, TII, dl, -GPRCS2Size); - // Build the new SUBri to adjust SP for FP callee-save spill area. - movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI); - emitSPUpdate(MBB, MBBI, -DPRCSSize, ARMCC::AL, 0, false, TII, *this, dl); - } + // Build the new SUBri to adjust SP for FP callee-save spill area. + movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI); + emitSPUpdate(MBB, MBBI, TII, dl, -DPRCSSize); // Determine starting offsets of spill areas. unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); @@ -1568,16 +1125,15 @@ void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const { AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); - + NumBytes = DPRCSOffset; if (NumBytes) { // Insert it after all the callee-save spills. - if (!isThumb) - movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI); - emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl); + movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI); + emitSPUpdate(MBB, MBBI, TII, dl, -NumBytes); } - if(STI.isTargetELF() && hasFP(MF)) { + if (STI.isTargetELF() && hasFP(MF)) { MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); } @@ -1596,8 +1152,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { return ((MI->getOpcode() == ARM::FLDD || - MI->getOpcode() == ARM::LDR || - MI->getOpcode() == ARM::tRestore) && + MI->getOpcode() == ARM::LDR) && MI->getOperand(1).isFI() && isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)); } @@ -1605,20 +1160,17 @@ static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { void ARMRegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = prior(MBB.end()); - assert((MBBI->getOpcode() == ARM::BX_RET || - MBBI->getOpcode() == ARM::tBX_RET || - MBBI->getOpcode() == ARM::tPOP_RET) && + assert(MBBI->getOpcode() == ARM::BX_RET && "Can only insert epilog into returning blocks"); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - bool isThumb = AFI->isThumbFunction(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl); + emitSPUpdate(MBB, MBBI, TII, dl, NumBytes); } else { // Unwind MBBI to point to first LDR / FLDD. const unsigned *CSRegs = getCalleeSavedRegs(); @@ -1634,110 +1186,73 @@ void ARMRegisterInfo::emitEpilogue(MachineFunction &MF, NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); - if (isThumb) { - if (hasFP(MF)) { - NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; - // Reset SP based on frame pointer only if the stack frame extends beyond - // frame pointer stack slot or target is ELF and the function has FP. + + // Darwin ABI requires FP to point to the stack slot that contains the + // previous FP. + if ((STI.isTargetDarwin() && NumBytes) || hasFP(MF)) { + NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; + // Reset SP based on frame pointer only if the stack frame extends beyond + // frame pointer stack slot or target is ELF and the function has FP. + if (AFI->getGPRCalleeSavedArea2Size() || + AFI->getDPRCalleeSavedAreaSize() || + AFI->getDPRCalleeSavedAreaOffset()|| + hasFP(MF)) { if (NumBytes) - emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes, - TII, *this, dl); + BuildMI(MBB, MBBI, dl, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr) + .addImm(NumBytes) + .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); else - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP) - .addReg(FramePtr); - } else { - if (MBBI->getOpcode() == ARM::tBX_RET && - &MBB.front() != MBBI && - prior(MBBI)->getOpcode() == ARM::tPOP) { - MachineBasicBlock::iterator PMBBI = prior(MBBI); - emitSPUpdate(MBB, PMBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, - *this, dl); - } else - emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, - *this, dl); - } - } else { - // Darwin ABI requires FP to point to the stack slot that contains the - // previous FP. - if ((STI.isTargetDarwin() && NumBytes) || hasFP(MF)) { - NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; - // Reset SP based on frame pointer only if the stack frame extends beyond - // frame pointer stack slot or target is ELF and the function has FP. - if (AFI->getGPRCalleeSavedArea2Size() || - AFI->getDPRCalleeSavedAreaSize() || - AFI->getDPRCalleeSavedAreaOffset()|| - hasFP(MF)) { - if (NumBytes) - BuildMI(MBB, MBBI, dl, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr) - .addImm(NumBytes) - .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); - else - BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP).addReg(FramePtr) - .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); - } - } else if (NumBytes) { - emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, false, TII, *this, dl); + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP).addReg(FramePtr) + .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); } + } else if (NumBytes) { + emitSPUpdate(MBB, MBBI, TII, dl, NumBytes); + } - // Move SP to start of integer callee save spill area 2. - movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI); - emitSPUpdate(MBB, MBBI, AFI->getDPRCalleeSavedAreaSize(), ARMCC::AL, 0, - false, TII, *this, dl); + // Move SP to start of integer callee save spill area 2. + movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI); + emitSPUpdate(MBB, MBBI, TII, dl, AFI->getDPRCalleeSavedAreaSize()); - // Move SP to start of integer callee save spill area 1. - movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI); - emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea2Size(), ARMCC::AL, 0, - false, TII, *this, dl); + // Move SP to start of integer callee save spill area 1. + movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI); + emitSPUpdate(MBB, MBBI, TII, dl, AFI->getGPRCalleeSavedArea2Size()); - // Move SP to SP upon entry to the function. - movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI); - emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea1Size(), ARMCC::AL, 0, - false, TII, *this, dl); - } + // Move SP to SP upon entry to the function. + movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI); + emitSPUpdate(MBB, MBBI, TII, dl, AFI->getGPRCalleeSavedArea1Size()); } - if (VARegSaveSize) { - if (isThumb) - // Epilogue for vararg functions: pop LR to R3 and branch off it. - // FIXME: Verify this is still ok when R3 is no longer being reserved. - BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3); - - emitSPUpdate(MBB, MBBI, VARegSaveSize, ARMCC::AL, 0, isThumb, TII, - *this, dl); + if (VARegSaveSize) + emitSPUpdate(MBB, MBBI, TII, dl, VARegSaveSize); - if (isThumb) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3); - MBB.erase(MBBI); - } - } } -unsigned ARMRegisterInfo::getRARegister() const { +unsigned ARMBaseRegisterInfo::getRARegister() const { return ARM::LR; } -unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned ARMBaseRegisterInfo::getFrameRegister(MachineFunction &MF) const { if (STI.isTargetDarwin() || hasFP(MF)) return FramePtr; return ARM::SP; } -unsigned ARMRegisterInfo::getEHExceptionRegister() const { +unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const { assert(0 && "What is the exception register"); return 0; } -unsigned ARMRegisterInfo::getEHHandlerRegister() const { +unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const { assert(0 && "What is the exception handler register"); return 0; } -int ARMRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { +int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); } -unsigned ARMRegisterInfo::getRegisterPairEven(unsigned Reg, - const MachineFunction &MF) const { +unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, + const MachineFunction &MF) const { switch (Reg) { default: break; // Return 0 if either register of the pair is a special register. @@ -1810,7 +1325,7 @@ unsigned ARMRegisterInfo::getRegisterPairEven(unsigned Reg, return 0; } -unsigned ARMRegisterInfo::getRegisterPairOdd(unsigned Reg, +unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const { switch (Reg) { default: break; diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index e8f4fd8..7fe075a 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -14,6 +14,7 @@ #ifndef ARMREGISTERINFO_H #define ARMREGISTERINFO_H +#include "ARM.h" #include "llvm/Target/TargetRegisterInfo.h" #include "ARMGenRegisterInfo.h.inc" @@ -30,21 +31,28 @@ namespace ARMRI { }; } -struct ARMRegisterInfo : public ARMGenRegisterInfo { +/// isARMLowRegister - Returns true if the register is low register r0-r7. +/// +static inline bool isARMLowRegister(unsigned Reg) { + using namespace ARM; + switch (Reg) { + case R0: case R1: case R2: case R3: + case R4: case R5: case R6: case R7: + return true; + default: + return false; + } +} + +struct ARMBaseRegisterInfo : public ARMGenRegisterInfo { +protected: const TargetInstrInfo &TII; const ARMSubtarget &STI; + /// FramePtr - ARM physical register used as frame ptr. + unsigned FramePtr; public: - ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI); - - /// emitLoadConstPool - Emits a load from constpool to materialize the - /// specified immediate. - void emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned DestReg, int Val, - unsigned Pred, unsigned PredReg, - const TargetInstrInfo *TII, bool isThumb, - DebugLoc dl) const; + ARMBaseRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI); /// getRegisterNumbering - Given the enum value for some register, e.g. /// ARM::LR, return the number that it corresponds to (e.g. 14). @@ -55,8 +63,6 @@ public: static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP); /// Code Generation virtual methods... - const TargetRegisterClass * - getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const; const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const TargetRegisterClass* const* @@ -79,25 +85,11 @@ public: void UpdateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const; - bool requiresRegisterScavenging(const MachineFunction &MF) const; - bool hasFP(const MachineFunction &MF) const; - bool hasReservedCallFrame(MachineFunction &MF) const; - - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(MachineFunction &MF) const; @@ -107,17 +99,44 @@ public: unsigned getEHHandlerRegister() const; int getDwarfRegNum(unsigned RegNum, bool isEH) const; - + bool isLowRegister(unsigned Reg) const; private: - /// FramePtr - ARM physical register used as frame ptr. - unsigned FramePtr; - unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const; unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const; +}; + +struct ARMRegisterInfo : public ARMBaseRegisterInfo { +public: + ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI); + + /// emitLoadConstPool - Emits a load from constpool to materialize the + /// specified immediate. + void emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const TargetInstrInfo *TII, DebugLoc dl, + unsigned DestReg, int Val, + ARMCC::CondCodes Pred = ARMCC::AL, + unsigned PredReg = 0) const; + + /// Code Generation virtual methods... + bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; + bool requiresRegisterScavenging(const MachineFunction &MF) const; + + bool hasReservedCallFrame(MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index c3cc7ff..5110b31 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -108,7 +108,8 @@ protected: bool isThumb() const { return IsThumb; } bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); } - bool hasThumb2() const { return IsThumb && (ThumbMode >= Thumb2); } + bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); } + bool hasThumb2() const { return ThumbMode >= Thumb2; } bool isR9Reserved() const { return IsR9Reserved; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index f7b8215..2344733 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -95,13 +95,18 @@ ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS) } ThumbTargetMachine::ThumbTargetMachine(const Module &M, const std::string &FS) - : ARMBaseTargetMachine(M, FS, true), InstrInfo(Subtarget), + : ARMBaseTargetMachine(M, FS, true), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:32-i64:32:32-" "i16:16:32-i8:8:32-i1:8:32-a:0:32") : std::string("e-p:32:32-f64:64:64-i64:64:64-" "i16:16:32-i8:8:32-i1:8:32-a:0:32")), TLInfo(*this) { + // Create the approriate type of Thumb InstrInfo + if (Subtarget.hasThumb2()) + InstrInfo = new Thumb2InstrInfo(Subtarget); + else + InstrInfo = new Thumb1InstrInfo(Subtarget); } unsigned ARMTargetMachine::getJITMatchQuality() { @@ -179,7 +184,7 @@ bool ARMBaseTargetMachine::addAssemblyEmitter(PassManagerBase &PM, // Output assembly language. assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); + PM.add(AsmPrinterCtor(Out, *this, Verbose)); return false; } @@ -198,7 +203,7 @@ bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; @@ -217,7 +222,7 @@ bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; @@ -232,7 +237,7 @@ bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; @@ -247,7 +252,7 @@ bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 0b49b92..a0df54d 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -22,7 +22,8 @@ #include "ARMJITInfo.h" #include "ARMSubtarget.h" #include "ARMISelLowering.h" -#include "ThumbInstrInfo.h" +#include "Thumb1InstrInfo.h" +#include "Thumb2InstrInfo.h" namespace llvm { @@ -43,7 +44,6 @@ protected: // set this functions to ctor pointer at startup time if they are linked in. typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, ARMBaseTargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose); static AsmPrinterCtorFn AsmPrinterCtor; @@ -112,23 +112,27 @@ public: }; /// ThumbTargetMachine - Thumb target machine. +/// Due to the way architectures are handled, this represents both +/// Thumb-1 and Thumb-2. /// class ThumbTargetMachine : public ARMBaseTargetMachine { - ThumbInstrInfo InstrInfo; - const TargetData DataLayout; // Calculates type size & alignment + ARMBaseInstrInfo *InstrInfo; // either Thumb1InstrInfo or Thumb2InstrInfo + const TargetData DataLayout; // Calculates type size & alignment ARMTargetLowering TLInfo; public: ThumbTargetMachine(const Module &M, const std::string &FS); - virtual const ARMRegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); + /// returns either Thumb1RegisterInfo of Thumb2RegisterInfo + virtual const ARMBaseRegisterInfo *getRegisterInfo() const { + return &InstrInfo->getRegisterInfo(); } - virtual ARMTargetLowering *getTargetLowering() const { + virtual ARMTargetLowering *getTargetLowering() const { return const_cast<ARMTargetLowering*>(&TLInfo); } - virtual const ThumbInstrInfo *getInstrInfo() const { return &InstrInfo; } + /// returns either Thumb1InstrInfo or Thumb2InstrInfo + virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo; } virtual const TargetData *getTargetData() const { return &DataLayout; } static unsigned getJITMatchQuality(); diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 400f628a..434a19a 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -82,9 +82,8 @@ namespace { bool InCPMode; public: explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : AsmPrinter(O, TM, T, OL, V), DW(0), AFI(NULL), MCP(NULL), + const TargetAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V), DW(0), AFI(NULL), MCP(NULL), InCPMode(false) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); } @@ -93,45 +92,50 @@ namespace { return "ARM Assembly Printer"; } - void printOperand(const MachineInstr *MI, int opNum, + void printOperand(const MachineInstr *MI, int OpNum, const char *Modifier = 0); - void printSOImmOperand(const MachineInstr *MI, int opNum); - void printSOImm2PartOperand(const MachineInstr *MI, int opNum); - void printSORegOperand(const MachineInstr *MI, int opNum); - void printAddrMode2Operand(const MachineInstr *MI, int OpNo); - void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo); - void printAddrMode3Operand(const MachineInstr *MI, int OpNo); - void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNo); - void printAddrMode4Operand(const MachineInstr *MI, int OpNo, + void printSOImmOperand(const MachineInstr *MI, int OpNum); + void printSOImm2PartOperand(const MachineInstr *MI, int OpNum); + void printSORegOperand(const MachineInstr *MI, int OpNum); + void printAddrMode2Operand(const MachineInstr *MI, int OpNum); + void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNum); + void printAddrMode3Operand(const MachineInstr *MI, int OpNum); + void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNum); + void printAddrMode4Operand(const MachineInstr *MI, int OpNum, const char *Modifier = 0); - void printAddrMode5Operand(const MachineInstr *MI, int OpNo, + void printAddrMode5Operand(const MachineInstr *MI, int OpNum, const char *Modifier = 0); - void printAddrModePCOperand(const MachineInstr *MI, int OpNo, + void printAddrMode6Operand(const MachineInstr *MI, int OpNum); + void printAddrModePCOperand(const MachineInstr *MI, int OpNum, const char *Modifier = 0); - void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNo); + void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum); - void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNo); - void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNo, + void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNum); + void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNum, unsigned Scale); - void printThumbAddrModeS1Operand(const MachineInstr *MI, int OpNo); - void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNo); - void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNo); - void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNo); + void printThumbAddrModeS1Operand(const MachineInstr *MI, int OpNum); + void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNum); + void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNum); + void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNum); - void printT2SOImmOperand(const MachineInstr *MI, int opNum); + void printT2SOImmOperand(const MachineInstr *MI, int OpNum); void printT2SOOperand(const MachineInstr *MI, int OpNum); - - void printPredicateOperand(const MachineInstr *MI, int opNum); - void printSBitModifierOperand(const MachineInstr *MI, int opNum); - void printPCLabel(const MachineInstr *MI, int opNum); - void printRegisterList(const MachineInstr *MI, int opNum); - void printCPInstOperand(const MachineInstr *MI, int opNum, + void printT2AddrModeImm12Operand(const MachineInstr *MI, int OpNum); + void printT2AddrModeImm8Operand(const MachineInstr *MI, int OpNum); + void printT2AddrModeImm8OffsetOperand(const MachineInstr *MI, int OpNum); + void printT2AddrModeSoRegOperand(const MachineInstr *MI, int OpNum); + + void printPredicateOperand(const MachineInstr *MI, int OpNum); + void printSBitModifierOperand(const MachineInstr *MI, int OpNum); + void printPCLabel(const MachineInstr *MI, int OpNum); + void printRegisterList(const MachineInstr *MI, int OpNum); + void printCPInstOperand(const MachineInstr *MI, int OpNum, const char *Modifier); - void printJTBlockOperand(const MachineInstr *MI, int opNum); + void printJTBlockOperand(const MachineInstr *MI, int OpNum); - virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode); - virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode); @@ -232,15 +236,16 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { printVisibility(CurrentFnName, F->getVisibility()); if (AFI->isThumbFunction()) { - EmitAlignment(1, F, AFI->getAlign()); + EmitAlignment(MF.getAlignment(), F, AFI->getAlign()); O << "\t.code\t16\n"; O << "\t.thumb_func"; if (Subtarget->isTargetDarwin()) O << "\t" << CurrentFnName; O << "\n"; InCPMode = false; - } else - EmitAlignment(2, F); + } else { + EmitAlignment(MF.getAlignment(), F); + } O << CurrentFnName << ":\n"; // Emit pre-function debug information. @@ -282,9 +287,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { return false; } -void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum, +void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, const char *Modifier) { - const MachineOperand &MO = MI->getOperand(opNum); + const MachineOperand &MO = MI->getOperand(OpNum); switch (MO.getType()) { case MachineOperand::MO_Register: { unsigned Reg = MO.getReg(); @@ -584,6 +589,22 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, O << "]"; } +void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + const MachineOperand &MO3 = MI->getOperand(Op+2); + + // FIXME: No support yet for specifying alignment. + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName << "]"; + + if (ARM_AM::getAM6WBFlag(MO3.getImm())) { + if (MO2.getReg() == 0) + O << "!"; + else + O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).AsmName; + } +} + void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op, const char *Modifier) { if (Modifier && strcmp(Modifier, "label") == 0) { @@ -606,6 +627,8 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op) { O << "#" << lsb << ", #" << width; } +//===--------------------------------------------------------------------===// + void ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op) { const MachineOperand &MO1 = MI->getOperand(Op); @@ -659,6 +682,8 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) { O << "]"; } +//===--------------------------------------------------------------------===// + /// printT2SOImmOperand - T2SOImm is: /// 1. a 4-bit splat control value and 8 bit immediate value /// 2. a 5-bit rotate amount and a non-zero 8-bit immediate value @@ -694,47 +719,110 @@ void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum) { O << "#" << ARM_AM::getSORegOffset(MO2.getImm()); } +void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI, + int OpNum) { + const MachineOperand &MO1 = MI->getOperand(OpNum); + const MachineOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + + unsigned OffImm = MO2.getImm(); + if (OffImm) // Don't print +0. + O << ", #+" << OffImm; + O << "]"; +} -void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int opNum) { - ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(opNum).getImm(); +void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI, + int OpNum) { + const MachineOperand &MO1 = MI->getOperand(OpNum); + const MachineOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + + int32_t OffImm = (int32_t)MO2.getImm(); + // Don't print +0. + if (OffImm < 0) + O << ", #-" << -OffImm; + else if (OffImm > 0) + O << ", #+" << OffImm; + O << "]"; +} + +void ARMAsmPrinter::printT2AddrModeImm8OffsetOperand(const MachineInstr *MI, + int OpNum) { + const MachineOperand &MO1 = MI->getOperand(OpNum); + int32_t OffImm = (int32_t)MO1.getImm(); + // Don't print +0. + if (OffImm < 0) + O << "#-" << -OffImm; + else if (OffImm > 0) + O << "#+" << OffImm; +} + +void ARMAsmPrinter::printT2AddrModeSoRegOperand(const MachineInstr *MI, + int OpNum) { + const MachineOperand &MO1 = MI->getOperand(OpNum); + const MachineOperand &MO2 = MI->getOperand(OpNum+1); + const MachineOperand &MO3 = MI->getOperand(OpNum+2); + + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + + if (MO2.getReg()) { + O << ", +" + << TM.getRegisterInfo()->get(MO2.getReg()).AsmName; + + unsigned ShAmt = MO3.getImm(); + if (ShAmt) { + assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!"); + O << ", lsl #" << ShAmt; + } + } + O << "]"; +} + + +//===--------------------------------------------------------------------===// + +void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int OpNum) { + ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); if (CC != ARMCC::AL) O << ARMCondCodeToString(CC); } -void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int opNum){ - unsigned Reg = MI->getOperand(opNum).getReg(); +void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum){ + unsigned Reg = MI->getOperand(OpNum).getReg(); if (Reg) { assert(Reg == ARM::CPSR && "Expect ARM CPSR register!"); O << 's'; } } -void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int opNum) { - int Id = (int)MI->getOperand(opNum).getImm(); +void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum) { + int Id = (int)MI->getOperand(OpNum).getImm(); O << TAI->getPrivateGlobalPrefix() << "PC" << Id; } -void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int opNum) { +void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum) { O << "{"; - for (unsigned i = opNum, e = MI->getNumOperands(); i != e; ++i) { + for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) { printOperand(MI, i); if (i != e-1) O << ", "; } O << "}"; } -void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo, +void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum, const char *Modifier) { assert(Modifier && "This operand only works with a modifier!"); // There are two aspects to a CONSTANTPOOL_ENTRY operand, the label and the // data itself. if (!strcmp(Modifier, "label")) { - unsigned ID = MI->getOperand(OpNo).getImm(); + unsigned ID = MI->getOperand(OpNum).getImm(); O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' << ID << ":\n"; } else { assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE"); - unsigned CPI = MI->getOperand(OpNo).getIndex(); + unsigned CPI = MI->getOperand(OpNum).getIndex(); const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; @@ -746,9 +834,9 @@ void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo, } } -void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) { - const MachineOperand &MO1 = MI->getOperand(OpNo); - const MachineOperand &MO2 = MI->getOperand(OpNo+1); // Unique Id +void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) { + const MachineOperand &MO1 = MI->getOperand(OpNum); + const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id unsigned JTI = MO1.getIndex(); O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm() << ":\n"; @@ -787,7 +875,7 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) { } -bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, +bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode){ // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { @@ -797,10 +885,10 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, default: return true; // Unknown modifier. case 'a': // Don't print "#" before a global var name or constant. case 'c': // Don't print "$" before a global var name or constant. - printOperand(MI, OpNo, "no_hash"); + printOperand(MI, OpNum, "no_hash"); return false; case 'P': // Print a VFP double precision register. - printOperand(MI, OpNo); + printOperand(MI, OpNum); return false; case 'Q': if (TM.getTargetData()->isLittleEndian()) @@ -812,24 +900,24 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, // Fallthrough case 'H': // Write second word of DI / DF reference. // Verify that this operand has two consecutive registers. - if (!MI->getOperand(OpNo).isReg() || - OpNo+1 == MI->getNumOperands() || - !MI->getOperand(OpNo+1).isReg()) + if (!MI->getOperand(OpNum).isReg() || + OpNum+1 == MI->getNumOperands() || + !MI->getOperand(OpNum+1).isReg()) return true; - ++OpNo; // Return the high-part. + ++OpNum; // Return the high-part. } } - printOperand(MI, OpNo); + printOperand(MI, OpNum); return false; } bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, unsigned AsmVariant, + unsigned OpNum, unsigned AsmVariant, const char *ExtraCode) { if (ExtraCode && ExtraCode[0]) return true; // Unknown modifier. - printAddrMode2Operand(MI, OpNo); + printAddrMode2Operand(MI, OpNum); return false; } @@ -1138,9 +1226,8 @@ bool ARMAsmPrinter::doFinalization(Module &M) { /// FunctionPass *llvm::createARMCodePrinterPass(raw_ostream &o, ARMBaseTargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose) { - return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); + return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); } namespace { diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index e665ed9..9c46fe0 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -24,5 +24,10 @@ add_llvm_target(ARMCodeGen ARMSubtarget.cpp ARMTargetAsmInfo.cpp ARMTargetMachine.cpp - ThumbInstrInfo.cpp + Thumb1InstrInfo.cpp + Thumb1RegisterInfo.cpp + Thumb2InstrInfo.cpp + Thumb2RegisterInfo.cpp ) + +target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG) diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 4223699..f3377f9 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -530,3 +530,10 @@ those operations and the ARMv6 scalar versions. ARM::MOVCCr is commutable (by flipping the condition). But we need to implement ARMInstrInfo::commuteInstruction() to support it. + +//===---------------------------------------------------------------------===// + +Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting +LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g. +ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg) +while ARMConstantIslandPass only need to worry about LDR (literal). diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp new file mode 100644 index 0000000..e13a811 --- /dev/null +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -0,0 +1,304 @@ +//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Thumb-1 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "ARMInstrInfo.h" +#include "ARM.h" +#include "ARMGenInstrInfo.inc" +#include "ARMMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/ADT/SmallVector.h" +#include "Thumb1InstrInfo.h" + +using namespace llvm; + +Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) + : ARMBaseInstrInfo(STI), RI(*this, STI) { +} + +bool Thumb1InstrInfo::isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned& SrcSubIdx, unsigned& DstSubIdx) const { + SrcSubIdx = DstSubIdx = 0; // No sub-registers. + + unsigned oc = MI.getOpcode(); + switch (oc) { + default: + return false; + case ARM::tMOVr: + case ARM::tMOVhir2lor: + case ARM::tMOVlor2hir: + case ARM::tMOVhir2hir: + assert(MI.getDesc().getNumOperands() >= 2 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + "Invalid Thumb MOV instruction"); + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + return true; + } +} + +unsigned Thumb1InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case ARM::tRestore: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +unsigned Thumb1InstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case ARM::tSpill: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + if (DestRC == ARM::GPRRegisterClass) { + if (SrcRC == ARM::GPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg); + return true; + } else if (SrcRC == ARM::tGPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg); + return true; + } + } else if (DestRC == ARM::tGPRRegisterClass) { + if (SrcRC == ARM::GPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg); + return true; + } else if (SrcRC == ARM::tGPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg); + return true; + } + } + + return false; +} + +bool Thumb1InstrInfo:: +canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops) const { + if (Ops.size() != 1) return false; + + unsigned OpNum = Ops[0]; + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: break; + case ARM::tMOVr: + case ARM::tMOVlor2hir: + case ARM::tMOVhir2lor: + case ARM::tMOVhir2hir: { + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg)) + // tSpill cannot take a high register operand. + return false; + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg)) + // tRestore cannot target a high register operand. + return false; + } + return true; + } + } + + return false; +} + +void Thumb1InstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!"); + + if (RC == ARM::tGPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tSpill)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0); + } +} + +void Thumb1InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, + bool isKill, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const{ + DebugLoc DL = DebugLoc::getUnknownLoc(); + unsigned Opc = 0; + + assert(RC == ARM::GPRRegisterClass && "Unknown regclass!"); + if (RC == ARM::GPRRegisterClass) { + Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR; + } + + MachineInstrBuilder MIB = + BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); + return; +} + +void Thumb1InstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!"); + + if (RC == ARM::tGPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg) + .addFrameIndex(FI).addImm(0); + } +} + +void Thumb1InstrInfo:: +loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + unsigned Opc = 0; + + if (RC == ARM::GPRRegisterClass) { + Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR; + } + + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); + for (unsigned i = 0, e = Addr.size(); i != e; ++i) + MIB.addOperand(Addr[i]); + NewMIs.push_back(MIB); + return; +} + +bool Thumb1InstrInfo:: +spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI) const { + if (CSI.empty()) + return false; + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH)); + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + MIB.addReg(Reg, RegState::Kill); + } + return true; +} + +bool Thumb1InstrInfo:: +restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (CSI.empty()) + return false; + + bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; + MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc()); + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + if (Reg == ARM::LR) { + // Special epilogue for vararg functions. See emitEpilogue + if (isVarArg) + continue; + Reg = ARM::PC; + PopMI->setDesc(get(ARM::tPOP_RET)); + MI = MBB.erase(MI); + } + PopMI->addOperand(MachineOperand::CreateReg(Reg, true)); + } + + // It's illegal to emit pop instruction without operands. + if (PopMI->getNumOperands() > 0) + MBB.insert(MI, PopMI); + + return true; +} + +MachineInstr *Thumb1InstrInfo:: +foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, int FI) const { + if (Ops.size() != 1) return NULL; + + unsigned OpNum = Ops[0]; + unsigned Opc = MI->getOpcode(); + MachineInstr *NewMI = NULL; + switch (Opc) { + default: break; + case ARM::tMOVr: + case ARM::tMOVlor2hir: + case ARM::tMOVhir2lor: + case ARM::tMOVhir2hir: { + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg)) + // tSpill cannot take a high register operand. + break; + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg)) + // tRestore cannot target a high register operand. + break; + bool isDead = MI->getOperand(0).isDead(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore)) + .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) + .addFrameIndex(FI).addImm(0); + } + break; + } + } + + return NewMI; +} diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h new file mode 100644 index 0000000..1bfa1d0 --- /dev/null +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -0,0 +1,93 @@ +//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Thumb-1 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef THUMB1INSTRUCTIONINFO_H +#define THUMB1INSTRUCTIONINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "ARM.h" +#include "ARMInstrInfo.h" +#include "Thumb1RegisterInfo.h" + +namespace llvm { + class ARMSubtarget; + +class Thumb1InstrInfo : public ARMBaseInstrInfo { + Thumb1RegisterInfo RI; +public: + explicit Thumb1InstrInfo(const ARMSubtarget &STI); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + const Thumb1RegisterInfo &getRegisterInfo() const { return RI; } + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI) const; + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI) const; + + bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const; + + bool canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops) const; + + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const; + + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const { + return 0; + } +}; +} + +#endif // THUMB1INSTRUCTIONINFO_H diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp new file mode 100644 index 0000000..92f01d1 --- /dev/null +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -0,0 +1,755 @@ +//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Thumb-1 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" +#include "Thumb1InstrInfo.h" +#include "Thumb1RegisterInfo.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +static cl::opt<bool> +ThumbRegScavenging("enable-thumb-reg-scavenging", + cl::Hidden, + cl::desc("Enable register scavenging on Thumb")); + +Thumb1RegisterInfo::Thumb1RegisterInfo(const TargetInstrInfo &tii, + const ARMSubtarget &sti) + : ARMBaseRegisterInfo(tii, sti) { +} + +/// emitLoadConstPool - Emits a load from constpool to materialize the +/// specified immediate. +void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, int Val, + const TargetInstrInfo *TII, + DebugLoc dl) const { + MachineFunction &MF = *MBB.getParent(); + MachineConstantPool *ConstantPool = MF.getConstantPool(); + Constant *C = ConstantInt::get(Type::Int32Ty, Val); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); + + BuildMI(MBB, MBBI, dl, TII->get(ARM::tLDRcp), DestReg) + .addConstantPoolIndex(Idx); +} + +const TargetRegisterClass* +Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const { + if (isARMLowRegister(Reg)) + return ARM::tGPRRegisterClass; + switch (Reg) { + default: + break; + case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: + case ARM::R12: case ARM::SP: case ARM::LR: case ARM::PC: + return ARM::GPRRegisterClass; + } + + return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT); +} + +bool +Thumb1RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { + return ThumbRegScavenging; +} + +bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { + const MachineFrameInfo *FFI = MF.getFrameInfo(); + unsigned CFSize = FFI->getMaxCallFrameSize(); + // It's not always a good idea to include the call frame as part of the + // stack frame. ARM (especially Thumb) has small immediate offset to + // address the stack frame. So a large call frame can cause poor codegen + // and may even makes it impossible to scavenge a register. + if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 + return false; + + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize +/// a destreg = basereg + immediate in Thumb code. Materialize the immediate +/// in a register using mov / mvn sequences or load the immediate from a +/// constpool entry. +static +void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, unsigned BaseReg, + int NumBytes, bool CanChangeCC, + const TargetInstrInfo &TII, + const Thumb1RegisterInfo& MRI, + DebugLoc dl) { + bool isHigh = !isARMLowRegister(DestReg) || + (BaseReg != 0 && !isARMLowRegister(BaseReg)); + bool isSub = false; + // Subtract doesn't have high register version. Load the negative value + // if either base or dest register is a high register. Also, if do not + // issue sub as part of the sequence if condition register is to be + // preserved. + if (NumBytes < 0 && !isHigh && CanChangeCC) { + isSub = true; + NumBytes = -NumBytes; + } + unsigned LdReg = DestReg; + if (DestReg == ARM::SP) { + assert(BaseReg == ARM::SP && "Unexpected!"); + LdReg = ARM::R3; + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) + .addReg(ARM::R3, RegState::Kill); + } + + if (NumBytes <= 255 && NumBytes >= 0) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes); + else if (NumBytes < 0 && NumBytes >= -255) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg) + .addReg(LdReg, RegState::Kill); + } else + MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, &TII, dl); + + // Emit add / sub. + int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr); + const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, + TII.get(Opc), DestReg); + if (DestReg == ARM::SP || isSub) + MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill); + else + MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); + if (DestReg == ARM::SP) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3) + .addReg(ARM::R12, RegState::Kill); +} + +/// calcNumMI - Returns the number of instructions required to materialize +/// the specific add / sub r, c instruction. +static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes, + unsigned NumBits, unsigned Scale) { + unsigned NumMIs = 0; + unsigned Chunk = ((1 << NumBits) - 1) * Scale; + + if (Opc == ARM::tADDrSPi) { + unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; + Bytes -= ThisVal; + NumMIs++; + NumBits = 8; + Scale = 1; // Followed by a number of tADDi8. + Chunk = ((1 << NumBits) - 1) * Scale; + } + + NumMIs += Bytes / Chunk; + if ((Bytes % Chunk) != 0) + NumMIs++; + if (ExtraOpc) + NumMIs++; + return NumMIs; +} + +/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize +/// a destreg = basereg + immediate in Thumb code. +static +void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, unsigned BaseReg, + int NumBytes, const TargetInstrInfo &TII, + const Thumb1RegisterInfo& MRI, + DebugLoc dl) { + bool isSub = NumBytes < 0; + unsigned Bytes = (unsigned)NumBytes; + if (isSub) Bytes = -NumBytes; + bool isMul4 = (Bytes & 3) == 0; + bool isTwoAddr = false; + bool DstNotEqBase = false; + unsigned NumBits = 1; + unsigned Scale = 1; + int Opc = 0; + int ExtraOpc = 0; + + if (DestReg == BaseReg && BaseReg == ARM::SP) { + assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); + NumBits = 7; + Scale = 4; + Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; + isTwoAddr = true; + } else if (!isSub && BaseReg == ARM::SP) { + // r1 = add sp, 403 + // => + // r1 = add sp, 100 * 4 + // r1 = add r1, 3 + if (!isMul4) { + Bytes &= ~3; + ExtraOpc = ARM::tADDi3; + } + NumBits = 8; + Scale = 4; + Opc = ARM::tADDrSPi; + } else { + // sp = sub sp, c + // r1 = sub sp, c + // r8 = sub sp, c + if (DestReg != BaseReg) + DstNotEqBase = true; + NumBits = 8; + Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; + isTwoAddr = true; + } + + unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale); + unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2; + if (NumMIs > Threshold) { + // This will expand into too many instructions. Load the immediate from a + // constpool entry. + emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII, + MRI, dl); + return; + } + + if (DstNotEqBase) { + if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) { + // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7) + unsigned Chunk = (1 << 3) - 1; + unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; + Bytes -= ThisVal; + BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg) + .addReg(BaseReg, RegState::Kill).addImm(ThisVal); + } else { + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) + .addReg(BaseReg, RegState::Kill); + } + BaseReg = DestReg; + } + + unsigned Chunk = ((1 << NumBits) - 1) * Scale; + while (Bytes) { + unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; + Bytes -= ThisVal; + ThisVal /= Scale; + // Build the new tADD / tSUB. + if (isTwoAddr) + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(DestReg).addImm(ThisVal); + else { + bool isKill = BaseReg != ARM::SP; + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); + BaseReg = DestReg; + + if (Opc == ARM::tADDrSPi) { + // r4 = add sp, imm + // r4 = add r4, imm + // ... + NumBits = 8; + Scale = 1; + Chunk = ((1 << NumBits) - 1) * Scale; + Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; + isTwoAddr = true; + } + } + } + + if (ExtraOpc) + BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg) + .addReg(DestReg, RegState::Kill) + .addImm(((unsigned)NumBytes) & 3); +} + +static void emitSPUpdate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const TargetInstrInfo &TII, DebugLoc dl, + const Thumb1RegisterInfo &MRI, + int NumBytes) { + emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII, + MRI, dl); +} + +void Thumb1RegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (!hasReservedCallFrame(MF)) { + // If we have alloca, convert as follows: + // ADJCALLSTACKDOWN -> sub, sp, sp, amount + // ADJCALLSTACKUP -> add, sp, sp, amount + MachineInstr *Old = I; + DebugLoc dl = Old->getDebugLoc(); + unsigned Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + // Replace the pseudo instruction with a new instruction... + unsigned Opc = Old->getOpcode(); + if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { + emitSPUpdate(MBB, I, TII, dl, *this, -Amount); + } else { + assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); + emitSPUpdate(MBB, I, TII, dl, *this, Amount); + } + } + } + MBB.erase(I); +} + +/// emitThumbConstant - Emit a series of instructions to materialize a +/// constant. +static void emitThumbConstant(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, int Imm, + const TargetInstrInfo &TII, + const Thumb1RegisterInfo& MRI, + DebugLoc dl) { + bool isSub = Imm < 0; + if (isSub) Imm = -Imm; + + int Chunk = (1 << 8) - 1; + int ThisVal = (Imm > Chunk) ? Chunk : Imm; + Imm -= ThisVal; + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal); + if (Imm > 0) + emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl); + if (isSub) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg) + .addReg(DestReg, RegState::Kill); +} + +void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const{ + unsigned i = 0; + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + DebugLoc dl = MI.getDebugLoc(); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + unsigned FrameReg = ARM::SP; + int FrameIndex = MI.getOperand(i).getIndex(); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + + MF.getFrameInfo()->getStackSize() + SPAdj; + + if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea1Offset(); + else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea2Offset(); + else if (hasFP(MF)) { + assert(SPAdj == 0 && "Unexpected"); + // There is alloca()'s in this function, must reference off the frame + // pointer instead. + FrameReg = getFrameRegister(MF); + Offset -= AFI->getFramePtrSpillOffset(); + } + + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = MI.getDesc(); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + + if (Opcode == ARM::tADDrSPi) { + Offset += MI.getOperand(i+1).getImm(); + + // Can't use tADDrSPi if it's based off the frame pointer. + unsigned NumBits = 0; + unsigned Scale = 1; + if (FrameReg != ARM::SP) { + Opcode = ARM::tADDi3; + MI.setDesc(TII.get(ARM::tADDi3)); + NumBits = 3; + } else { + NumBits = 8; + Scale = 4; + assert((Offset & 3) == 0 && + "Thumb add/sub sp, #imm immediate must be multiple of 4!"); + } + + if (Offset == 0) { + // Turn it into a move. + MI.setDesc(TII.get(ARM::tMOVhir2lor)); + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.RemoveOperand(i+1); + return; + } + + // Common case: small offset, fits into instruction. + unsigned Mask = (1 << NumBits) - 1; + if (((Offset / Scale) & ~Mask) == 0) { + // Replace the FrameIndex with sp / fp + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(Offset / Scale); + return; + } + + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned Bytes = (Offset > 0) ? Offset : -Offset; + unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale); + // MI would expand into a large number of instructions. Don't try to + // simplify the immediate. + if (NumMIs > 2) { + emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII, + *this, dl); + MBB.erase(II); + return; + } + + if (Offset > 0) { + // Translate r0 = add sp, imm to + // r0 = add sp, 255*4 + // r0 = add r0, (imm - 255*4) + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(Mask); + Offset = (Offset - Mask * Scale); + MachineBasicBlock::iterator NII = next(II); + emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII, + *this, dl); + } else { + // Translate r0 = add sp, -imm to + // r0 = -imm (this is then translated into a series of instructons) + // r0 = add r0, sp + emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl); + MI.setDesc(TII.get(ARM::tADDhirr)); + MI.getOperand(i).ChangeToRegister(DestReg, false, false, true); + MI.getOperand(i+1).ChangeToRegister(FrameReg, false); + } + return; + } else { + unsigned ImmIdx = 0; + int InstrOffs = 0; + unsigned NumBits = 0; + unsigned Scale = 1; + switch (AddrMode) { + case ARMII::AddrModeT1_s: { + ImmIdx = i+1; + InstrOffs = MI.getOperand(ImmIdx).getImm(); + NumBits = (FrameReg == ARM::SP) ? 8 : 5; + Scale = 4; + break; + } + default: + assert(0 && "Unsupported addressing mode!"); + abort(); + break; + } + + Offset += InstrOffs * Scale; + assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); + + // Common case: small offset, fits into instruction. + MachineOperand &ImmOp = MI.getOperand(ImmIdx); + int ImmedOffset = Offset / Scale; + unsigned Mask = (1 << NumBits) - 1; + if ((unsigned)Offset <= Mask * Scale) { + // Replace the FrameIndex with sp + MI.getOperand(i).ChangeToRegister(FrameReg, false); + ImmOp.ChangeToImmediate(ImmedOffset); + return; + } + + bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill; + if (AddrMode == ARMII::AddrModeT1_s) { + // Thumb tLDRspi, tSTRspi. These will change to instructions that use + // a different base register. + NumBits = 5; + Mask = (1 << NumBits) - 1; + } + // If this is a thumb spill / restore, we will be using a constpool load to + // materialize the offset. + if (AddrMode == ARMII::AddrModeT1_s && isThumSpillRestore) + ImmOp.ChangeToImmediate(0); + else { + // Otherwise, it didn't fit. Pull in what we can to simplify the immed. + ImmedOffset = ImmedOffset & Mask; + ImmOp.ChangeToImmediate(ImmedOffset); + Offset &= ~(Mask*Scale); + } + } + + // If we get here, the immediate doesn't fit into the instruction. We folded + // as much as possible above, handle the rest, providing a register that is + // SP+LargeImm. + assert(Offset && "This code isn't needed if offset already handled!"); + + if (Desc.mayLoad()) { + // Use the destination register to materialize sp + offset. + unsigned TmpReg = MI.getOperand(0).getReg(); + bool UseRR = false; + if (Opcode == ARM::tRestore) { + if (FrameReg == ARM::SP) + emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, + Offset, false, TII, *this, dl); + else { + emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl); + UseRR = true; + } + } else + emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, + *this, dl); + MI.setDesc(TII.get(ARM::tLDR)); + MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); + if (UseRR) + // Use [reg, reg] addrmode. + MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); + else // tLDR has an extra register operand. + MI.addOperand(MachineOperand::CreateReg(0, false)); + } else if (Desc.mayStore()) { + // FIXME! This is horrific!!! We need register scavenging. + // Our temporary workaround has marked r3 unavailable. Of course, r3 is + // also a ABI register so it's possible that is is the register that is + // being storing here. If that's the case, we do the following: + // r12 = r2 + // Use r2 to materialize sp + offset + // str r3, r2 + // r2 = r12 + unsigned ValReg = MI.getOperand(0).getReg(); + unsigned TmpReg = ARM::R3; + bool UseRR = false; + if (ValReg == ARM::R3) { + BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) + .addReg(ARM::R2, RegState::Kill); + TmpReg = ARM::R2; + } + if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) + BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) + .addReg(ARM::R3, RegState::Kill); + if (Opcode == ARM::tSpill) { + if (FrameReg == ARM::SP) + emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, + Offset, false, TII, *this, dl); + else { + emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl); + UseRR = true; + } + } else + emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, + *this, dl); + MI.setDesc(TII.get(ARM::tSTR)); + MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); + if (UseRR) // Use [reg, reg] addrmode. + MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); + else // tSTR has an extra register operand. + MI.addOperand(MachineOperand::CreateReg(0, false)); + + MachineBasicBlock::iterator NII = next(II); + if (ValReg == ARM::R3) + BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2) + .addReg(ARM::R12, RegState::Kill); + if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) + BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3) + .addReg(ARM::R12, RegState::Kill); + } else + assert(false && "Unexpected opcode!"); +} + +void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned NumBytes = MFI->getStackSize(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + DebugLoc dl = (MBBI != MBB.end() ? + MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); + + // Check if R3 is live in. It might have to be used as a scratch register. + for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(), + E = MF.getRegInfo().livein_end(); I != E; ++I) { + if (I->first == ARM::R3) { + AFI->setR3IsLiveIn(true); + break; + } + } + + // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. + NumBytes = (NumBytes + 3) & ~3; + MFI->setStackSize(NumBytes); + + // Determine the sizes of each callee-save spill areas and record which frame + // belongs to which callee-save spill areas. + unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; + int FramePtrSpillFI = 0; + + if (VARegSaveSize) + emitSPUpdate(MBB, MBBI, TII, dl, *this, -VARegSaveSize); + + if (!AFI->hasStackFrame()) { + if (NumBytes != 0) + emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes); + return; + } + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + int FI = CSI[i].getFrameIdx(); + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + break; + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + if (STI.isTargetDarwin()) { + AFI->addGPRCalleeSavedArea2Frame(FI); + GPRCS2Size += 4; + } else { + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + } + break; + default: + AFI->addDPRCalleeSavedAreaFrame(FI); + DPRCSSize += 8; + } + } + + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { + ++MBBI; + if (MBBI != MBB.end()) + dl = MBBI->getDebugLoc(); + } + + // Darwin ABI requires FP to point to the stack slot that contains the + // previous FP. + if (STI.isTargetDarwin() || hasFP(MF)) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) + .addFrameIndex(FramePtrSpillFI).addImm(0); + } + + // Determine starting offsets of spill areas. + unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; + unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); + AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); + AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); + + NumBytes = DPRCSOffset; + if (NumBytes) { + // Insert it after all the callee-save spills. + emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes); + } + + if (STI.isTargetELF() && hasFP(MF)) { + MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - + AFI->getFramePtrSpillOffset()); + } + + AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); + AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); + AFI->setDPRCalleeSavedAreaSize(DPRCSSize); +} + +static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { + for (unsigned i = 0; CSRegs[i]; ++i) + if (Reg == CSRegs[i]) + return true; + return false; +} + +static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { + return (MI->getOpcode() == ARM::tRestore && + MI->getOperand(1).isFI() && + isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)); +} + +void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + assert((MBBI->getOpcode() == ARM::tBX_RET || + MBBI->getOpcode() == ARM::tPOP_RET) && + "Can only insert epilog into returning blocks"); + DebugLoc dl = MBBI->getDebugLoc(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + int NumBytes = (int)MFI->getStackSize(); + + if (!AFI->hasStackFrame()) { + if (NumBytes != 0) + emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); + } else { + // Unwind MBBI to point to first LDR / FLDD. + const unsigned *CSRegs = getCalleeSavedRegs(); + if (MBBI != MBB.begin()) { + do + --MBBI; + while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); + if (!isCSRestore(MBBI, CSRegs)) + ++MBBI; + } + + // Move SP to start of FP callee save spill area. + NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + + AFI->getGPRCalleeSavedArea2Size() + + AFI->getDPRCalleeSavedAreaSize()); + + if (hasFP(MF)) { + NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; + // Reset SP based on frame pointer only if the stack frame extends beyond + // frame pointer stack slot or target is ELF and the function has FP. + if (NumBytes) + emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes, + TII, *this, dl); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP) + .addReg(FramePtr); + } else { + if (MBBI->getOpcode() == ARM::tBX_RET && + &MBB.front() != MBBI && + prior(MBBI)->getOpcode() == ARM::tPOP) { + MachineBasicBlock::iterator PMBBI = prior(MBBI); + emitSPUpdate(MBB, PMBBI, TII, dl, *this, NumBytes); + } else + emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); + } + } + + if (VARegSaveSize) { + // Epilogue for vararg functions: pop LR to R3 and branch off it. + // FIXME: Verify this is still ok when R3 is no longer being reserved. + BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3); + + emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3); + MBB.erase(MBBI); + } +} diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h new file mode 100644 index 0000000..6d4f1f0 --- /dev/null +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -0,0 +1,60 @@ +//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Thumb-1 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef THUMB1REGISTERINFO_H +#define THUMB1REGISTERINFO_H + +#include "ARM.h" +#include "ARMRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +namespace llvm { + class ARMSubtarget; + class TargetInstrInfo; + class Type; + +struct Thumb1RegisterInfo : public ARMBaseRegisterInfo { +public: + Thumb1RegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI); + + /// emitLoadConstPool - Emits a load from constpool to materialize the + /// specified immediate. + void emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, int Val, + const TargetInstrInfo *TII, + DebugLoc dl) const; + + /// Code Generation virtual methods... + const TargetRegisterClass * + getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const; + + bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; + + bool requiresRegisterScavenging(const MachineFunction &MF) const; + + bool hasReservedCallFrame(MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; +}; +} + +#endif // THUMB1REGISTERINFO_H diff --git a/lib/Target/ARM/ThumbInstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 075d940..35d09fd 100644 --- a/lib/Target/ARM/ThumbInstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -1,4 +1,4 @@ -//===- ThumbInstrInfo.cpp - Thumb Instruction Information --------*- C++ -*-===// +//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains the Thumb implementation of the TargetInstrInfo class. +// This file contains the Thumb-2 implementation of the TargetInstrInfo class. // //===----------------------------------------------------------------------===// @@ -18,17 +18,17 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/ADT/SmallVector.h" -#include "ThumbInstrInfo.h" +#include "Thumb2InstrInfo.h" using namespace llvm; -ThumbInstrInfo::ThumbInstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI) { +Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) + : ARMBaseInstrInfo(STI), RI(*this, STI) { } -bool ThumbInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned& SrcSubIdx, unsigned& DstSubIdx) const { +bool Thumb2InstrInfo::isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned& SrcSubIdx, unsigned& DstSubIdx) const { SrcSubIdx = DstSubIdx = 0; // No sub-registers. unsigned oc = MI.getOpcode(); @@ -50,8 +50,8 @@ bool ThumbInstrInfo::isMoveInstr(const MachineInstr &MI, } } -unsigned ThumbInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { +unsigned Thumb2InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { switch (MI->getOpcode()) { default: break; // FIXME: Thumb2 @@ -67,8 +67,8 @@ unsigned ThumbInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return 0; } -unsigned ThumbInstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { +unsigned Thumb2InstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { switch (MI->getOpcode()) { default: break; // FIXME: Thumb2 @@ -84,11 +84,11 @@ unsigned ThumbInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } -bool ThumbInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { +bool Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); @@ -114,7 +114,38 @@ bool ThumbInstrInfo::copyRegToReg(MachineBasicBlock &MBB, return false; } -void ThumbInstrInfo:: +bool Thumb2InstrInfo:: +canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops) const { + if (Ops.size() != 1) return false; + + unsigned OpNum = Ops[0]; + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: break; + case ARM::tMOVr: + case ARM::tMOVlor2hir: + case ARM::tMOVhir2lor: + case ARM::tMOVhir2hir: { + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg)) + // tSpill cannot take a high register operand. + return false; + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg)) + // tRestore cannot target a high register operand. + return false; + } + return true; + } + } + + return false; +} + +void Thumb2InstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC) const { @@ -131,11 +162,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } } -void ThumbInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, - bool isKill, - SmallVectorImpl<MachineOperand> &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const{ +void Thumb2InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, + bool isKill, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const{ DebugLoc DL = DebugLoc::getUnknownLoc(); unsigned Opc = 0; @@ -153,7 +184,7 @@ void ThumbInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, return; } -void ThumbInstrInfo:: +void Thumb2InstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC) const { @@ -169,7 +200,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } } -void ThumbInstrInfo:: +void Thumb2InstrInfo:: loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC, @@ -189,7 +220,7 @@ loadRegFromAddr(MachineFunction &MF, unsigned DestReg, return; } -bool ThumbInstrInfo:: +bool Thumb2InstrInfo:: spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI) const { @@ -209,7 +240,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } -bool ThumbInstrInfo:: +bool Thumb2InstrInfo:: restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI) const { @@ -240,11 +271,10 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } -MachineInstr *ThumbInstrInfo:: +MachineInstr *Thumb2InstrInfo:: foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops, int FI) const { if (Ops.size() != 1) return NULL; - const ARMRegisterInfo &RI = getRegisterInfo(); unsigned OpNum = Ops[0]; unsigned Opc = MI->getOpcode(); @@ -258,7 +288,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, if (OpNum == 0) { // move -> store unsigned SrcReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); - if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg)) + if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg)) // tSpill cannot take a high register operand. break; NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill)) @@ -266,7 +296,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, .addFrameIndex(FI).addImm(0); } else { // move -> load unsigned DstReg = MI->getOperand(0).getReg(); - if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg)) + if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg)) // tRestore cannot target a high register operand. break; bool isDead = MI->getOperand(0).isDead(); diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h new file mode 100644 index 0000000..84dcb49 --- /dev/null +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -0,0 +1,93 @@ +//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Thumb-2 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef THUMB2INSTRUCTIONINFO_H +#define THUMB2INSTRUCTIONINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "ARM.h" +#include "ARMInstrInfo.h" +#include "Thumb2RegisterInfo.h" + +namespace llvm { + class ARMSubtarget; + +class Thumb2InstrInfo : public ARMBaseInstrInfo { + Thumb2RegisterInfo RI; +public: + explicit Thumb2InstrInfo(const ARMSubtarget &STI); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + const Thumb2RegisterInfo &getRegisterInfo() const { return RI; } + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI) const; + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI) const; + + bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const; + + bool canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops) const; + + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const; + + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const { + return 0; + } +}; +} + +#endif // THUMB2INSTRUCTIONINFO_H diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp new file mode 100644 index 0000000..0f0c0e4 --- /dev/null +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -0,0 +1,755 @@ +//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Thumb-2 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" +#include "Thumb2InstrInfo.h" +#include "Thumb2RegisterInfo.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +static cl::opt<bool> +Thumb2RegScavenging("enable-thumb2-reg-scavenging", + cl::Hidden, + cl::desc("Enable register scavenging on Thumb-2")); + +Thumb2RegisterInfo::Thumb2RegisterInfo(const TargetInstrInfo &tii, + const ARMSubtarget &sti) + : ARMBaseRegisterInfo(tii, sti) { +} + +/// emitLoadConstPool - Emits a load from constpool to materialize the +/// specified immediate. +void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, int Val, + const TargetInstrInfo *TII, + DebugLoc dl) const { + MachineFunction &MF = *MBB.getParent(); + MachineConstantPool *ConstantPool = MF.getConstantPool(); + Constant *C = ConstantInt::get(Type::Int32Ty, Val); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); + + BuildMI(MBB, MBBI, dl, TII->get(ARM::tLDRcp), DestReg) + .addConstantPoolIndex(Idx); +} + +const TargetRegisterClass* +Thumb2RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const { + if (isARMLowRegister(Reg)) + return ARM::tGPRRegisterClass; + switch (Reg) { + default: + break; + case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: + case ARM::R12: case ARM::SP: case ARM::LR: case ARM::PC: + return ARM::GPRRegisterClass; + } + + return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT); +} + +bool +Thumb2RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { + return Thumb2RegScavenging; +} + +bool Thumb2RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { + const MachineFrameInfo *FFI = MF.getFrameInfo(); + unsigned CFSize = FFI->getMaxCallFrameSize(); + // It's not always a good idea to include the call frame as part of the + // stack frame. ARM (especially Thumb) has small immediate offset to + // address the stack frame. So a large call frame can cause poor codegen + // and may even makes it impossible to scavenge a register. + if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 + return false; + + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize +/// a destreg = basereg + immediate in Thumb code. Materialize the immediate +/// in a register using mov / mvn sequences or load the immediate from a +/// constpool entry. +static +void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, unsigned BaseReg, + int NumBytes, bool CanChangeCC, + const TargetInstrInfo &TII, + const Thumb2RegisterInfo& MRI, + DebugLoc dl) { + bool isHigh = !isARMLowRegister(DestReg) || + (BaseReg != 0 && !isARMLowRegister(BaseReg)); + bool isSub = false; + // Subtract doesn't have high register version. Load the negative value + // if either base or dest register is a high register. Also, if do not + // issue sub as part of the sequence if condition register is to be + // preserved. + if (NumBytes < 0 && !isHigh && CanChangeCC) { + isSub = true; + NumBytes = -NumBytes; + } + unsigned LdReg = DestReg; + if (DestReg == ARM::SP) { + assert(BaseReg == ARM::SP && "Unexpected!"); + LdReg = ARM::R3; + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) + .addReg(ARM::R3, RegState::Kill); + } + + if (NumBytes <= 255 && NumBytes >= 0) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes); + else if (NumBytes < 0 && NumBytes >= -255) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg) + .addReg(LdReg, RegState::Kill); + } else + MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, &TII, dl); + + // Emit add / sub. + int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr); + const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, + TII.get(Opc), DestReg); + if (DestReg == ARM::SP || isSub) + MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill); + else + MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); + if (DestReg == ARM::SP) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3) + .addReg(ARM::R12, RegState::Kill); +} + +/// calcNumMI - Returns the number of instructions required to materialize +/// the specific add / sub r, c instruction. +static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes, + unsigned NumBits, unsigned Scale) { + unsigned NumMIs = 0; + unsigned Chunk = ((1 << NumBits) - 1) * Scale; + + if (Opc == ARM::tADDrSPi) { + unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; + Bytes -= ThisVal; + NumMIs++; + NumBits = 8; + Scale = 1; // Followed by a number of tADDi8. + Chunk = ((1 << NumBits) - 1) * Scale; + } + + NumMIs += Bytes / Chunk; + if ((Bytes % Chunk) != 0) + NumMIs++; + if (ExtraOpc) + NumMIs++; + return NumMIs; +} + +/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize +/// a destreg = basereg + immediate in Thumb code. +static +void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, unsigned BaseReg, + int NumBytes, const TargetInstrInfo &TII, + const Thumb2RegisterInfo& MRI, + DebugLoc dl) { + bool isSub = NumBytes < 0; + unsigned Bytes = (unsigned)NumBytes; + if (isSub) Bytes = -NumBytes; + bool isMul4 = (Bytes & 3) == 0; + bool isTwoAddr = false; + bool DstNotEqBase = false; + unsigned NumBits = 1; + unsigned Scale = 1; + int Opc = 0; + int ExtraOpc = 0; + + if (DestReg == BaseReg && BaseReg == ARM::SP) { + assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); + NumBits = 7; + Scale = 4; + Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; + isTwoAddr = true; + } else if (!isSub && BaseReg == ARM::SP) { + // r1 = add sp, 403 + // => + // r1 = add sp, 100 * 4 + // r1 = add r1, 3 + if (!isMul4) { + Bytes &= ~3; + ExtraOpc = ARM::tADDi3; + } + NumBits = 8; + Scale = 4; + Opc = ARM::tADDrSPi; + } else { + // sp = sub sp, c + // r1 = sub sp, c + // r8 = sub sp, c + if (DestReg != BaseReg) + DstNotEqBase = true; + NumBits = 8; + Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; + isTwoAddr = true; + } + + unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale); + unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2; + if (NumMIs > Threshold) { + // This will expand into too many instructions. Load the immediate from a + // constpool entry. + emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII, + MRI, dl); + return; + } + + if (DstNotEqBase) { + if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) { + // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7) + unsigned Chunk = (1 << 3) - 1; + unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; + Bytes -= ThisVal; + BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg) + .addReg(BaseReg, RegState::Kill).addImm(ThisVal); + } else { + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) + .addReg(BaseReg, RegState::Kill); + } + BaseReg = DestReg; + } + + unsigned Chunk = ((1 << NumBits) - 1) * Scale; + while (Bytes) { + unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; + Bytes -= ThisVal; + ThisVal /= Scale; + // Build the new tADD / tSUB. + if (isTwoAddr) + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(DestReg).addImm(ThisVal); + else { + bool isKill = BaseReg != ARM::SP; + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); + BaseReg = DestReg; + + if (Opc == ARM::tADDrSPi) { + // r4 = add sp, imm + // r4 = add r4, imm + // ... + NumBits = 8; + Scale = 1; + Chunk = ((1 << NumBits) - 1) * Scale; + Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; + isTwoAddr = true; + } + } + } + + if (ExtraOpc) + BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg) + .addReg(DestReg, RegState::Kill) + .addImm(((unsigned)NumBytes) & 3); +} + +static void emitSPUpdate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const TargetInstrInfo &TII, DebugLoc dl, + const Thumb2RegisterInfo &MRI, + int NumBytes) { + emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII, + MRI, dl); +} + +void Thumb2RegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (!hasReservedCallFrame(MF)) { + // If we have alloca, convert as follows: + // ADJCALLSTACKDOWN -> sub, sp, sp, amount + // ADJCALLSTACKUP -> add, sp, sp, amount + MachineInstr *Old = I; + DebugLoc dl = Old->getDebugLoc(); + unsigned Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + // Replace the pseudo instruction with a new instruction... + unsigned Opc = Old->getOpcode(); + if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { + emitSPUpdate(MBB, I, TII, dl, *this, -Amount); + } else { + assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); + emitSPUpdate(MBB, I, TII, dl, *this, Amount); + } + } + } + MBB.erase(I); +} + +/// emitThumbConstant - Emit a series of instructions to materialize a +/// constant. +static void emitThumbConstant(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, int Imm, + const TargetInstrInfo &TII, + const Thumb2RegisterInfo& MRI, + DebugLoc dl) { + bool isSub = Imm < 0; + if (isSub) Imm = -Imm; + + int Chunk = (1 << 8) - 1; + int ThisVal = (Imm > Chunk) ? Chunk : Imm; + Imm -= ThisVal; + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal); + if (Imm > 0) + emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl); + if (isSub) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg) + .addReg(DestReg, RegState::Kill); +} + +void Thumb2RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const{ + unsigned i = 0; + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + DebugLoc dl = MI.getDebugLoc(); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + unsigned FrameReg = ARM::SP; + int FrameIndex = MI.getOperand(i).getIndex(); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + + MF.getFrameInfo()->getStackSize() + SPAdj; + + if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea1Offset(); + else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea2Offset(); + else if (hasFP(MF)) { + assert(SPAdj == 0 && "Unexpected"); + // There is alloca()'s in this function, must reference off the frame + // pointer instead. + FrameReg = getFrameRegister(MF); + Offset -= AFI->getFramePtrSpillOffset(); + } + + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = MI.getDesc(); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + + if (Opcode == ARM::tADDrSPi) { + Offset += MI.getOperand(i+1).getImm(); + + // Can't use tADDrSPi if it's based off the frame pointer. + unsigned NumBits = 0; + unsigned Scale = 1; + if (FrameReg != ARM::SP) { + Opcode = ARM::tADDi3; + MI.setDesc(TII.get(ARM::tADDi3)); + NumBits = 3; + } else { + NumBits = 8; + Scale = 4; + assert((Offset & 3) == 0 && + "Thumb add/sub sp, #imm immediate must be multiple of 4!"); + } + + if (Offset == 0) { + // Turn it into a move. + MI.setDesc(TII.get(ARM::tMOVhir2lor)); + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.RemoveOperand(i+1); + return; + } + + // Common case: small offset, fits into instruction. + unsigned Mask = (1 << NumBits) - 1; + if (((Offset / Scale) & ~Mask) == 0) { + // Replace the FrameIndex with sp / fp + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(Offset / Scale); + return; + } + + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned Bytes = (Offset > 0) ? Offset : -Offset; + unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale); + // MI would expand into a large number of instructions. Don't try to + // simplify the immediate. + if (NumMIs > 2) { + emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII, + *this, dl); + MBB.erase(II); + return; + } + + if (Offset > 0) { + // Translate r0 = add sp, imm to + // r0 = add sp, 255*4 + // r0 = add r0, (imm - 255*4) + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(Mask); + Offset = (Offset - Mask * Scale); + MachineBasicBlock::iterator NII = next(II); + emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII, + *this, dl); + } else { + // Translate r0 = add sp, -imm to + // r0 = -imm (this is then translated into a series of instructons) + // r0 = add r0, sp + emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl); + MI.setDesc(TII.get(ARM::tADDhirr)); + MI.getOperand(i).ChangeToRegister(DestReg, false, false, true); + MI.getOperand(i+1).ChangeToRegister(FrameReg, false); + } + return; + } else { + unsigned ImmIdx = 0; + int InstrOffs = 0; + unsigned NumBits = 0; + unsigned Scale = 1; + switch (AddrMode) { + case ARMII::AddrModeT1_s: { + ImmIdx = i+1; + InstrOffs = MI.getOperand(ImmIdx).getImm(); + NumBits = (FrameReg == ARM::SP) ? 8 : 5; + Scale = 4; + break; + } + default: + assert(0 && "Unsupported addressing mode!"); + abort(); + break; + } + + Offset += InstrOffs * Scale; + assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); + + // Common case: small offset, fits into instruction. + MachineOperand &ImmOp = MI.getOperand(ImmIdx); + int ImmedOffset = Offset / Scale; + unsigned Mask = (1 << NumBits) - 1; + if ((unsigned)Offset <= Mask * Scale) { + // Replace the FrameIndex with sp + MI.getOperand(i).ChangeToRegister(FrameReg, false); + ImmOp.ChangeToImmediate(ImmedOffset); + return; + } + + bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill; + if (AddrMode == ARMII::AddrModeT1_s) { + // Thumb tLDRspi, tSTRspi. These will change to instructions that use + // a different base register. + NumBits = 5; + Mask = (1 << NumBits) - 1; + } + // If this is a thumb spill / restore, we will be using a constpool load to + // materialize the offset. + if (AddrMode == ARMII::AddrModeT1_s && isThumSpillRestore) + ImmOp.ChangeToImmediate(0); + else { + // Otherwise, it didn't fit. Pull in what we can to simplify the immed. + ImmedOffset = ImmedOffset & Mask; + ImmOp.ChangeToImmediate(ImmedOffset); + Offset &= ~(Mask*Scale); + } + } + + // If we get here, the immediate doesn't fit into the instruction. We folded + // as much as possible above, handle the rest, providing a register that is + // SP+LargeImm. + assert(Offset && "This code isn't needed if offset already handled!"); + + if (Desc.mayLoad()) { + // Use the destination register to materialize sp + offset. + unsigned TmpReg = MI.getOperand(0).getReg(); + bool UseRR = false; + if (Opcode == ARM::tRestore) { + if (FrameReg == ARM::SP) + emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, + Offset, false, TII, *this, dl); + else { + emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl); + UseRR = true; + } + } else + emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, + *this, dl); + MI.setDesc(TII.get(ARM::tLDR)); + MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); + if (UseRR) + // Use [reg, reg] addrmode. + MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); + else // tLDR has an extra register operand. + MI.addOperand(MachineOperand::CreateReg(0, false)); + } else if (Desc.mayStore()) { + // FIXME! This is horrific!!! We need register scavenging. + // Our temporary workaround has marked r3 unavailable. Of course, r3 is + // also a ABI register so it's possible that is is the register that is + // being storing here. If that's the case, we do the following: + // r12 = r2 + // Use r2 to materialize sp + offset + // str r3, r2 + // r2 = r12 + unsigned ValReg = MI.getOperand(0).getReg(); + unsigned TmpReg = ARM::R3; + bool UseRR = false; + if (ValReg == ARM::R3) { + BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) + .addReg(ARM::R2, RegState::Kill); + TmpReg = ARM::R2; + } + if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) + BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) + .addReg(ARM::R3, RegState::Kill); + if (Opcode == ARM::tSpill) { + if (FrameReg == ARM::SP) + emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, + Offset, false, TII, *this, dl); + else { + emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl); + UseRR = true; + } + } else + emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, + *this, dl); + MI.setDesc(TII.get(ARM::tSTR)); + MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); + if (UseRR) // Use [reg, reg] addrmode. + MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); + else // tSTR has an extra register operand. + MI.addOperand(MachineOperand::CreateReg(0, false)); + + MachineBasicBlock::iterator NII = next(II); + if (ValReg == ARM::R3) + BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2) + .addReg(ARM::R12, RegState::Kill); + if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) + BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3) + .addReg(ARM::R12, RegState::Kill); + } else + assert(false && "Unexpected opcode!"); +} + +void Thumb2RegisterInfo::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned NumBytes = MFI->getStackSize(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + DebugLoc dl = (MBBI != MBB.end() ? + MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); + + // Check if R3 is live in. It might have to be used as a scratch register. + for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(), + E = MF.getRegInfo().livein_end(); I != E; ++I) { + if (I->first == ARM::R3) { + AFI->setR3IsLiveIn(true); + break; + } + } + + // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. + NumBytes = (NumBytes + 3) & ~3; + MFI->setStackSize(NumBytes); + + // Determine the sizes of each callee-save spill areas and record which frame + // belongs to which callee-save spill areas. + unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; + int FramePtrSpillFI = 0; + + if (VARegSaveSize) + emitSPUpdate(MBB, MBBI, TII, dl, *this, -VARegSaveSize); + + if (!AFI->hasStackFrame()) { + if (NumBytes != 0) + emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes); + return; + } + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + int FI = CSI[i].getFrameIdx(); + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + break; + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + if (STI.isTargetDarwin()) { + AFI->addGPRCalleeSavedArea2Frame(FI); + GPRCS2Size += 4; + } else { + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + } + break; + default: + AFI->addDPRCalleeSavedAreaFrame(FI); + DPRCSSize += 8; + } + } + + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { + ++MBBI; + if (MBBI != MBB.end()) + dl = MBBI->getDebugLoc(); + } + + // Darwin ABI requires FP to point to the stack slot that contains the + // previous FP. + if (STI.isTargetDarwin() || hasFP(MF)) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) + .addFrameIndex(FramePtrSpillFI).addImm(0); + } + + // Determine starting offsets of spill areas. + unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; + unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); + AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); + AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); + + NumBytes = DPRCSOffset; + if (NumBytes) { + // Insert it after all the callee-save spills. + emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes); + } + + if (STI.isTargetELF() && hasFP(MF)) { + MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - + AFI->getFramePtrSpillOffset()); + } + + AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); + AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); + AFI->setDPRCalleeSavedAreaSize(DPRCSSize); +} + +static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { + for (unsigned i = 0; CSRegs[i]; ++i) + if (Reg == CSRegs[i]) + return true; + return false; +} + +static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { + return (MI->getOpcode() == ARM::tRestore && + MI->getOperand(1).isFI() && + isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)); +} + +void Thumb2RegisterInfo::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + assert((MBBI->getOpcode() == ARM::tBX_RET || + MBBI->getOpcode() == ARM::tPOP_RET) && + "Can only insert epilog into returning blocks"); + DebugLoc dl = MBBI->getDebugLoc(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + int NumBytes = (int)MFI->getStackSize(); + + if (!AFI->hasStackFrame()) { + if (NumBytes != 0) + emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); + } else { + // Unwind MBBI to point to first LDR / FLDD. + const unsigned *CSRegs = getCalleeSavedRegs(); + if (MBBI != MBB.begin()) { + do + --MBBI; + while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); + if (!isCSRestore(MBBI, CSRegs)) + ++MBBI; + } + + // Move SP to start of FP callee save spill area. + NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + + AFI->getGPRCalleeSavedArea2Size() + + AFI->getDPRCalleeSavedAreaSize()); + + if (hasFP(MF)) { + NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; + // Reset SP based on frame pointer only if the stack frame extends beyond + // frame pointer stack slot or target is ELF and the function has FP. + if (NumBytes) + emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes, + TII, *this, dl); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP) + .addReg(FramePtr); + } else { + if (MBBI->getOpcode() == ARM::tBX_RET && + &MBB.front() != MBBI && + prior(MBBI)->getOpcode() == ARM::tPOP) { + MachineBasicBlock::iterator PMBBI = prior(MBBI); + emitSPUpdate(MBB, PMBBI, TII, dl, *this, NumBytes); + } else + emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); + } + } + + if (VARegSaveSize) { + // Epilogue for vararg functions: pop LR to R3 and branch off it. + // FIXME: Verify this is still ok when R3 is no longer being reserved. + BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3); + + emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3); + MBB.erase(MBBI); + } +} diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h new file mode 100644 index 0000000..d379c31 --- /dev/null +++ b/lib/Target/ARM/Thumb2RegisterInfo.h @@ -0,0 +1,60 @@ +//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Thumb-2 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef THUMB2REGISTERINFO_H +#define THUMB2REGISTERINFO_H + +#include "ARM.h" +#include "ARMRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +namespace llvm { + class ARMSubtarget; + class TargetInstrInfo; + class Type; + +struct Thumb2RegisterInfo : public ARMBaseRegisterInfo { +public: + Thumb2RegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI); + + /// emitLoadConstPool - Emits a load from constpool to materialize the + /// specified immediate. + void emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, int Val, + const TargetInstrInfo *TII, + DebugLoc dl) const; + + /// Code Generation virtual methods... + const TargetRegisterClass * + getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const; + + bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; + + bool requiresRegisterScavenging(const MachineFunction &MF) const; + + bool hasReservedCallFrame(MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; +}; +} + +#endif // THUMB2REGISTERINFO_H diff --git a/lib/Target/ARM/ThumbInstrInfo.h b/lib/Target/ARM/ThumbInstrInfo.h deleted file mode 100644 index dcf1095..0000000 --- a/lib/Target/ARM/ThumbInstrInfo.h +++ /dev/null @@ -1,85 +0,0 @@ -//===- ThumbInstrInfo.h - Thumb Instruction Information ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the ARM implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef THUMBINSTRUCTIONINFO_H -#define THUMBINSTRUCTIONINFO_H - -#include "llvm/Target/TargetInstrInfo.h" -#include "ARMRegisterInfo.h" -#include "ARM.h" -#include "ARMInstrInfo.h" - -namespace llvm { - class ARMSubtarget; - -class ThumbInstrInfo : public ARMBaseInstrInfo { -public: - explicit ThumbInstrInfo(const ARMSubtarget &STI); - - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - virtual unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; - - virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl<MachineOperand> &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; - - virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl<MachineOperand> &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; - virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; - virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; -}; -} - -#endif // THUMBINSTRUCTIONINFO_H diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h index 2815176..0818e25 100644 --- a/lib/Target/Alpha/Alpha.h +++ b/lib/Target/Alpha/Alpha.h @@ -27,7 +27,6 @@ namespace llvm { FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM); FunctionPass *createAlphaCodePrinterPass(raw_ostream &OS, TargetMachine &TM, - CodeGenOpt::Level OptLevel, bool Verbose); FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM); FunctionPass *createAlphaCodeEmitterPass(AlphaTargetMachine &TM, diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 1be1713..fa0b656 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -181,6 +181,11 @@ const char *AlphaTargetLowering::getTargetNodeName(unsigned Opcode) const { } } +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned AlphaTargetLowering::getFunctionAlignment(const Function *F) const { + return 4; +} + static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) { MVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); @@ -360,7 +365,8 @@ static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) { std::pair<SDValue, SDValue> AlphaTargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt, bool isVarArg, - bool isInreg, unsigned CallingConv, + bool isInreg, unsigned NumFixedArgs, + unsigned CallingConv, bool isTailCall, SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) { diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h index fdd817c..4925367 100644 --- a/lib/Target/Alpha/AlphaISelLowering.h +++ b/lib/Target/Alpha/AlphaISelLowering.h @@ -86,9 +86,9 @@ namespace llvm { /// actual call. virtual std::pair<SDValue, SDValue> LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt, - bool isVarArg, bool isInreg, unsigned CC, bool isTailCall, - SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, - DebugLoc dl); + bool isVarArg, bool isInreg, unsigned NumFixedArgs, unsigned CC, + bool isTailCall, SDValue Callee, ArgListTy &Args, + SelectionDAG &DAG, DebugLoc dl); ConstraintType getConstraintType(const std::string &Constraint) const; @@ -103,6 +103,9 @@ namespace llvm { virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *F) const; + private: // Helpers for custom lowering. void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr, diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp index 229f9d4..76a594f 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -289,19 +289,22 @@ MachineInstr *AlphaInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (Ops[0] == 0) { // move -> store unsigned InReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); Opc = (Opc == Alpha::BISr) ? Alpha::STQ : ((Opc == Alpha::CPYSS) ? Alpha::STS : Alpha::STT); NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(InReg, getKillRegState(isKill)) + .addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef)) .addFrameIndex(FrameIndex) .addReg(Alpha::F31); } else { // load -> move unsigned OutReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); Opc = (Opc == Alpha::BISr) ? Alpha::LDQ : ((Opc == Alpha::CPYSS) ? Alpha::LDS : Alpha::LDT); NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(OutReg, RegState::Define | getDeadRegState(isDead)) + .addReg(OutReg, RegState::Define | getDeadRegState(isDead) | + getUndefRegState(isUndef)) .addFrameIndex(FrameIndex) .addReg(Alpha::F31); } @@ -470,6 +473,7 @@ unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Alpha::R29, &Alpha::GPRCRegClass, &Alpha::GPRCRegClass); assert(Ok && "Couldn't assign to global base register!"); + Ok = Ok; // Silence warning when assertions are turned off. RegInfo.addLiveIn(Alpha::R29); AlphaFI->setGlobalBaseReg(GlobalBaseReg); @@ -496,6 +500,7 @@ unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const { bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalRetAddr, Alpha::R26, &Alpha::GPRCRegClass, &Alpha::GPRCRegClass); assert(Ok && "Couldn't assign to global return address register!"); + Ok = Ok; // Silence warning when assertions are turned off. RegInfo.addLiveIn(Alpha::R26); AlphaFI->setGlobalRetAddr(GlobalRetAddr); diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp index 10952eb..060089c 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.cpp +++ b/lib/Target/Alpha/AlphaTargetMachine.cpp @@ -94,7 +94,7 @@ bool AlphaTargetMachine::addAssemblyEmitter(PassManagerBase &PM, // Output assembly language. assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); + PM.add(AsmPrinterCtor(Out, *this, Verbose)); return false; } bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM, @@ -104,7 +104,7 @@ bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; } @@ -115,7 +115,7 @@ bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; } diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h index 946ca55..26684c7 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.h +++ b/lib/Target/Alpha/AlphaTargetMachine.h @@ -41,7 +41,6 @@ protected: // set this functions to ctor pointer at startup time if they are linked in. typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, TargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose); static AsmPrinterCtorFn AsmPrinterCtor; diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp index e0c0a64..982ef5e 100644 --- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp +++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp @@ -38,9 +38,8 @@ namespace { /// explicit AlphaAsmPrinter(raw_ostream &o, TargetMachine &tm, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : AsmPrinter(o, tm, T, OL, V) {} + const TargetAsmInfo *T, bool V) + : AsmPrinter(o, tm, T, V) {} virtual const char *getPassName() const { return "Alpha Assembly Printer"; @@ -70,9 +69,8 @@ namespace { /// FunctionPass *llvm::createAlphaCodePrinterPass(raw_ostream &o, TargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose) { - return new AlphaAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); + return new AlphaAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); } #include "AlphaGenAsmWriter.inc" @@ -155,7 +153,7 @@ bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) { const Function *F = MF.getFunction(); SwitchToSection(TAI->SectionForGlobal(F)); - EmitAlignment(4, F); + EmitAlignment(MF.getAlignment(), F); switch (F->getLinkage()) { default: assert(0 && "Unknown linkage type!"); case Function::InternalLinkage: // Symbols default to internal. diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt index 1e535f7..2a382d5 100644 --- a/lib/Target/Alpha/CMakeLists.txt +++ b/lib/Target/Alpha/CMakeLists.txt @@ -23,3 +23,5 @@ add_llvm_target(AlphaCodeGen AlphaTargetAsmInfo.cpp AlphaTargetMachine.cpp ) + +target_link_libraries (LLVMAlphaCodeGen LLVMSelectionDAG) diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp index 02b625b..2847d0b 100644 --- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp @@ -50,9 +50,8 @@ namespace { std::set<std::string> FnStubs, GVStubs; public: explicit SPUAsmPrinter(raw_ostream &O, TargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) : - AsmPrinter(O, TM, T, OL, V) {} + const TargetAsmInfo *T, bool V) : + AsmPrinter(O, TM, T, V) {} virtual const char *getPassName() const { return "STI CBEA SPU Assembly Printer"; @@ -290,9 +289,8 @@ namespace { DwarfWriter *DW; public: explicit LinuxAsmPrinter(raw_ostream &O, SPUTargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level F, - bool V) - : SPUAsmPrinter(O, TM, T, F, V), DW(0) {} + const TargetAsmInfo *T, bool V) + : SPUAsmPrinter(O, TM, T, V), DW(0) {} virtual const char *getPassName() const { return "STI CBEA SPU Assembly Printer"; @@ -433,7 +431,7 @@ LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) const Function *F = MF.getFunction(); SwitchToSection(TAI->SectionForGlobal(F)); - EmitAlignment(3, F); + EmitAlignment(MF.getAlignment(), F); switch (F->getLinkage()) { default: assert(0 && "Unknown linkage type!"); @@ -603,9 +601,8 @@ bool LinuxAsmPrinter::doFinalization(Module &M) { /// FunctionPass *llvm::createSPUAsmPrinterPass(raw_ostream &o, SPUTargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose) { - return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); + return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); } // Force static initialization. diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt index e3e12ac..8a55845 100644 --- a/lib/Target/CellSPU/CMakeLists.txt +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -22,3 +22,5 @@ add_llvm_target(CellSPUCodeGen SPUTargetAsmInfo.cpp SPUTargetMachine.cpp ) + +target_link_libraries (LLVMCellSPUCodeGen LLVMSelectionDAG) diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h index 77a062e..10d1110 100644 --- a/lib/Target/CellSPU/SPU.h +++ b/lib/Target/CellSPU/SPU.h @@ -26,7 +26,6 @@ namespace llvm { FunctionPass *createSPUISelDag(SPUTargetMachine &TM); FunctionPass *createSPUAsmPrinterPass(raw_ostream &o, SPUTargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose); /*--== Utility functions/predicates/etc used all over the place: --==*/ diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index b286443..d8a7776 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -113,7 +113,7 @@ namespace { const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT(); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, - CallingConv::C, false, Callee, Args, DAG, + 0, CallingConv::C, false, Callee, Args, DAG, Op.getDebugLoc()); return CallInfo.first; @@ -481,6 +481,11 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const return ((i != node_names.end()) ? i->second : 0); } +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const { + return 3; +} + //===----------------------------------------------------------------------===// // Return the Cell SPU's SETCC result type //===----------------------------------------------------------------------===// diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 866c632..b1583f4 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -89,7 +89,6 @@ namespace llvm { public TargetLowering { int VarArgsFrameIndex; // FrameIndex for start of varargs area. - int ReturnAddrIndex; // FrameIndex for return slot. SPUTargetMachine &SPUTM; public: @@ -148,6 +147,9 @@ namespace llvm { virtual bool isLegalAddressImmediate(GlobalValue *) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *F) const; }; } diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 4af995a..e629c8d 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -491,19 +491,22 @@ SPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (OpNum == 0) { // move -> store unsigned InReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); if (FrameIndex < SPUFrameInfo::maxFrameOffset()) { MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(SPU::STQDr32)); - MIB.addReg(InReg, getKillRegState(isKill)); + MIB.addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef)); NewMI = addFrameReference(MIB, FrameIndex); } } else { // move -> load unsigned OutReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)); - MIB.addReg(OutReg, RegState::Define | getDeadRegState(isDead)); + MIB.addReg(OutReg, RegState::Define | getDeadRegState(isDead) | + getUndefRegState(isUndef)); Opc = (FrameIndex < SPUFrameInfo::maxFrameOffset()) ? SPU::STQDr32 : SPU::STQXr32; NewMI = addFrameReference(MIB, FrameIndex); diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 256d63d..2470972 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -94,6 +94,6 @@ bool SPUTargetMachine::addAssemblyEmitter(PassManagerBase &PM, // Output assembly language. assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); + PM.add(AsmPrinterCtor(Out, *this, Verbose)); return false; } diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index d8fe300..4c28521 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -43,7 +43,6 @@ protected: // set this functions to ctor pointer at startup time if they are linked in. typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, SPUTargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose); static AsmPrinterCtorFn AsmPrinterCtor; diff --git a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp index 6b34a4e..d85c0ea 100644 --- a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp +++ b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp @@ -38,9 +38,8 @@ namespace { std::set<std::string> ExternalFunctionNames, ExternalObjectNames; public: explicit IA64AsmPrinter(raw_ostream &O, TargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : AsmPrinter(O, TM, T, OL, V) {} + const TargetAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V) {} virtual const char *getPassName() const { return "IA64 Assembly Printer"; @@ -137,7 +136,7 @@ bool IA64AsmPrinter::runOnMachineFunction(MachineFunction &MF) { SwitchToSection(TAI->SectionForGlobal(F)); // Print out labels for the function. - EmitAlignment(5); + EmitAlignment(MF.getAlignment()); O << "\t.global\t" << CurrentFnName << '\n'; printVisibility(CurrentFnName, F->getVisibility()); @@ -373,9 +372,8 @@ bool IA64AsmPrinter::doFinalization(Module &M) { /// FunctionPass *llvm::createIA64CodePrinterPass(raw_ostream &o, IA64TargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose) { - return new IA64AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); + return new IA64AsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); } namespace { diff --git a/lib/Target/IA64/CMakeLists.txt b/lib/Target/IA64/CMakeLists.txt index 26f86ca..638ed2e 100644 --- a/lib/Target/IA64/CMakeLists.txt +++ b/lib/Target/IA64/CMakeLists.txt @@ -18,3 +18,5 @@ add_llvm_target(IA64CodeGen IA64TargetAsmInfo.cpp IA64TargetMachine.cpp ) + +target_link_libraries (LLVMIA64CodeGen LLVMSelectionDAG) diff --git a/lib/Target/IA64/IA64.h b/lib/Target/IA64/IA64.h index ec8e3d6..9c758fd 100644 --- a/lib/Target/IA64/IA64.h +++ b/lib/Target/IA64/IA64.h @@ -39,7 +39,6 @@ FunctionPass *createIA64BundlingPass(IA64TargetMachine &TM); /// FunctionPass *createIA64CodePrinterPass(raw_ostream &o, IA64TargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose); } // End llvm namespace diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp index c545b9c..c622345 100644 --- a/lib/Target/IA64/IA64ISelLowering.cpp +++ b/lib/Target/IA64/IA64ISelLowering.cpp @@ -148,6 +148,11 @@ MVT IA64TargetLowering::getSetCCResultType(MVT VT) const { return MVT::i1; } +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned IA64TargetLowering::getFunctionAlignment(const Function *) const { + return 5; +} + void IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, SmallVectorImpl<SDValue> &ArgValues, DebugLoc dl) { @@ -310,7 +315,8 @@ void IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, std::pair<SDValue, SDValue> IA64TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt, bool isVarArg, - bool isInreg, unsigned CallingConv, + bool isInreg, unsigned NumFixedArgs, + unsigned CallingConv, bool isTailCall, SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) { diff --git a/lib/Target/IA64/IA64ISelLowering.h b/lib/Target/IA64/IA64ISelLowering.h index edf7eb8..b9c8bf2 100644 --- a/lib/Target/IA64/IA64ISelLowering.h +++ b/lib/Target/IA64/IA64ISelLowering.h @@ -62,7 +62,7 @@ namespace llvm { virtual std::pair<SDValue, SDValue> LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg, - unsigned CC, bool isTailCall, + unsigned NumFixedArgs, unsigned CC, bool isTailCall, SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl); @@ -70,6 +70,8 @@ namespace llvm { /// (currently, only "ret void") virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *F) const; }; } diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp index 4b05e1d..71a0a98 100644 --- a/lib/Target/IA64/IA64TargetMachine.cpp +++ b/lib/Target/IA64/IA64TargetMachine.cpp @@ -73,7 +73,7 @@ IA64TargetMachine::IA64TargetMachine(const Module &M, const std::string &FS) //===----------------------------------------------------------------------===// bool IA64TargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel){ + CodeGenOpt::Level OptLevel) { PM.add(createIA64DAGToDAGInstructionSelector(*this)); return false; } @@ -91,7 +91,7 @@ bool IA64TargetMachine::addAssemblyEmitter(PassManagerBase &PM, // Output assembly language. assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); + PM.add(AsmPrinterCtor(Out, *this, Verbose)); return false; } diff --git a/lib/Target/IA64/IA64TargetMachine.h b/lib/Target/IA64/IA64TargetMachine.h index a64da9f..402d833 100644 --- a/lib/Target/IA64/IA64TargetMachine.h +++ b/lib/Target/IA64/IA64TargetMachine.h @@ -38,7 +38,6 @@ protected: // set this functions to ctor pointer at startup time if they are linked in. typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, IA64TargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose); static AsmPrinterCtorFn AsmPrinterCtor; diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h index ed0cd04..fc13c9e 100644 --- a/lib/Target/MSP430/MSP430.h +++ b/lib/Target/MSP430/MSP430.h @@ -26,7 +26,6 @@ namespace llvm { CodeGenOpt::Level OptLevel); FunctionPass *createMSP430CodePrinterPass(raw_ostream &o, MSP430TargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose); } // end namespace llvm; diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp index 71b785b..b1fa3f0 100644 --- a/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -40,9 +40,8 @@ namespace { class VISIBILITY_HIDDEN MSP430AsmPrinter : public AsmPrinter { public: MSP430AsmPrinter(raw_ostream &O, MSP430TargetMachine &TM, - const TargetAsmInfo *TAI, - CodeGenOpt::Level OL, bool V) - : AsmPrinter(O, TM, TAI, OL, V) {} + const TargetAsmInfo *TAI, bool V) + : AsmPrinter(O, TM, TAI, V) {} virtual const char *getPassName() const { return "MSP430 Assembly Printer"; @@ -77,9 +76,8 @@ namespace { /// FunctionPass *llvm::createMSP430CodePrinterPass(raw_ostream &o, MSP430TargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose) { - return new MSP430AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); + return new MSP430AsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); } bool MSP430AsmPrinter::doInitialization(Module &M) { @@ -97,10 +95,7 @@ void MSP430AsmPrinter::emitFunctionHeader(const MachineFunction &MF) { SwitchToSection(TAI->SectionForGlobal(F)); - unsigned FnAlign = 4; - if (F->hasFnAttr(Attribute::OptimizeForSize)) - FnAlign = 1; - + unsigned FnAlign = MF.getAlignment(); EmitAlignment(FnAlign, F); switch (F->getLinkage()) { diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 14db20e..91a8663 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -127,6 +127,11 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { } } +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned MSP430TargetLowering::getFunctionAlignment(const Function *F) const { + return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4; +} + //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 404534d..4a90a0e 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -74,6 +74,9 @@ namespace llvm { /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *F) const; + SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG); SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); SDValue LowerRET(SDValue Op, SelectionDAG &DAG); diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index ef6f997..d40bac7 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -350,6 +350,7 @@ unsigned MSP430RegisterInfo::getFrameRegister(MachineFunction &MF) const { int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { assert(0 && "Not implemented yet!"); + return 0; } #include "MSP430GenRegisterInfo.inc" diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index b1fe758..dd09d43 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -62,7 +62,7 @@ bool MSP430TargetMachine::addAssemblyEmitter(PassManagerBase &PM, bool Verbose, raw_ostream &Out) { // Output assembly language. - PM.add(createMSP430CodePrinterPass(Out, *this, OptLevel, Verbose)); + PM.add(createMSP430CodePrinterPass(Out, *this, Verbose)); return false; } diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index 431630b..cb40479 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -51,9 +51,8 @@ namespace { const MipsSubtarget *Subtarget; public: explicit MipsAsmPrinter(raw_ostream &O, MipsTargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : AsmPrinter(O, TM, T, OL, V) { + const TargetAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V) { Subtarget = &TM.getSubtarget<MipsSubtarget>(); } @@ -93,9 +92,8 @@ namespace { /// regardless of whether the function is in SSA form. FunctionPass *llvm::createMipsCodePrinterPass(raw_ostream &o, MipsTargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose) { - return new MipsAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); + return new MipsAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); } //===----------------------------------------------------------------------===// @@ -230,7 +228,7 @@ emitFunctionStart(MachineFunction &MF) SwitchToSection(TAI->SectionForGlobal(F)); // 2 bits aligned - EmitAlignment(2, F); + EmitAlignment(MF.getAlignment(), F); O << "\t.globl\t" << CurrentFnName << '\n'; O << "\t.ent\t" << CurrentFnName << '\n'; diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 70c7a51..d27e6f1 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -20,3 +20,5 @@ add_llvm_target(MipsCodeGen MipsTargetAsmInfo.cpp MipsTargetMachine.cpp ) + +target_link_libraries (LLVMMipsCodeGen LLVMSelectionDAG) diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index 0accb4e..9b22a91 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -27,7 +27,6 @@ namespace llvm { FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); FunctionPass *createMipsCodePrinterPass(raw_ostream &OS, MipsTargetMachine &TM, - CodeGenOpt::Level OptLevel, bool Verbose); } // end namespace llvm; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 42afceb..3d2e2b7 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -154,11 +154,14 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM) computeRegisterProperties(); } - MVT MipsTargetLowering::getSetCCResultType(MVT VT) const { return MVT::i32; } +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const { + return 2; +} SDValue MipsTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 55cd6ea..9ad4895 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -68,8 +68,6 @@ namespace llvm { //===--------------------------------------------------------------------===// class MipsTargetLowering : public TargetLowering { - // FrameIndex for return slot. - int ReturnAddrIndex; public: explicit MipsTargetLowering(MipsTargetMachine &TM); @@ -84,6 +82,8 @@ namespace llvm { /// getSetCCResultType - get the ISD::SETCC result ValueType MVT getSetCCResultType(MVT VT) const; + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *F) const; private: // Subtarget Info const MipsSubtarget *Subtarget; diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 92af973..e16fd8e 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -291,14 +291,17 @@ foldMemoryOperandImpl(MachineFunction &MF, if (Ops[0] == 0) { // COPY -> STORE unsigned SrcReg = MI->getOperand(2).getReg(); bool isKill = MI->getOperand(2).isKill(); + bool isUndef = MI->getOperand(2).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::SW)) - .addReg(SrcReg, getKillRegState(isKill)) + .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) .addImm(0).addFrameIndex(FI); } else { // COPY -> LOAD unsigned DstReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::LW)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) + .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | + getUndefRegState(isUndef)) .addImm(0).addFrameIndex(FI); } } @@ -321,14 +324,17 @@ foldMemoryOperandImpl(MachineFunction &MF, if (Ops[0] == 0) { // COPY -> STORE unsigned SrcReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(2).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(StoreOpc)) - .addReg(SrcReg, getKillRegState(isKill)) + .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) .addImm(0).addFrameIndex(FI) ; } else { // COPY -> LOAD unsigned DstReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(LoadOpc)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) + .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | + getUndefRegState(isUndef)) .addImm(0).addFrameIndex(FI); } } @@ -645,6 +651,7 @@ unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { Mips::CPURegsRegisterClass, Mips::CPURegsRegisterClass); assert(Ok && "Couldn't assign to global base register!"); + Ok = Ok; // Silence warning when assertions are turned off. RegInfo.addLiveIn(Mips::GP); MipsFI->setGlobalBaseReg(GlobalBaseReg); diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index c5f117b..4675536 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -134,6 +134,6 @@ addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, bool Verbose, raw_ostream &Out) { // Output assembly language. assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); + PM.add(AsmPrinterCtor(Out, *this, Verbose)); return false; } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 85fafad..95e5be4 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -39,7 +39,6 @@ namespace llvm { // linked in. typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, MipsTargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose); static AsmPrinterCtorFn AsmPrinterCtor; diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h index cf0f9db..7940648 100644 --- a/lib/Target/PIC16/PIC16.h +++ b/lib/Target/PIC16/PIC16.h @@ -151,6 +151,7 @@ namespace PIC16CC { return STATIC_LOCAL; assert (0 && "Could not determine Symbol's tag"); + return PREFIX_SYMBOL; // Silence warning when assertions are turned off. } // addPrefix - add prefix symbol to a name if there isn't one already. @@ -331,7 +332,6 @@ namespace PIC16CC { FunctionPass *createPIC16ISelDag(PIC16TargetMachine &TM); FunctionPass *createPIC16CodePrinterPass(raw_ostream &OS, PIC16TargetMachine &TM, - CodeGenOpt::Level OptLevel, bool Verbose); // Banksel optimzer pass. FunctionPass *createPIC16MemSelOptimizerPass(); diff --git a/lib/Target/PIC16/PIC16AsmPrinter.cpp b/lib/Target/PIC16/PIC16AsmPrinter.cpp index ca1089b..1fc1cc1 100644 --- a/lib/Target/PIC16/PIC16AsmPrinter.cpp +++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp @@ -113,9 +113,8 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { /// FunctionPass *llvm::createPIC16CodePrinterPass(raw_ostream &o, PIC16TargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose) { - return new PIC16AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); + return new PIC16AsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); } diff --git a/lib/Target/PIC16/PIC16AsmPrinter.h b/lib/Target/PIC16/PIC16AsmPrinter.h index 3ec5659..cb11687 100644 --- a/lib/Target/PIC16/PIC16AsmPrinter.h +++ b/lib/Target/PIC16/PIC16AsmPrinter.h @@ -30,9 +30,8 @@ namespace llvm { struct VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter { explicit PIC16AsmPrinter(raw_ostream &O, PIC16TargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : AsmPrinter(O, TM, T, OL, V), DbgInfo(O, T) { + const TargetAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V), DbgInfo(O, T) { PTLI = TM.getTargetLowering(); PTAI = static_cast<const PIC16TargetAsmInfo *> (T); } diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index ec1db90..0d24f61 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -399,7 +399,7 @@ PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, const Type *RetTy = RetVT.getTypeForMVT(); std::pair<SDValue,SDValue> CallInfo = LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, CallingConv::C, false, Callee, Args, DAG, dl); + false, 0, CallingConv::C, false, Callee, Args, DAG, dl); return CallInfo.first; } @@ -841,12 +841,16 @@ SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) { // i.e. without any extension MVT MemVT = LD->getMemoryVT(); unsigned MemBytes = MemVT.getSizeInBits() / 8; + // if MVT::i1 is extended to MVT::i8 then MemBytes will be zero + // So set it to one + if (MemBytes == 0) MemBytes = 1; + unsigned ExtdBytes = VT.getSizeInBits() / 8; Offset = DAG.getConstant(LoadOffset, MVT::i8); Tys = DAG.getVTList(MVT::i8, MVT::Other); // For MemBytes generate PIC16Load with proper offset - for (iter=0; iter<MemBytes; ++iter) { + for (iter=0; iter < MemBytes; ++iter) { // Add the pointer offset if any Offset = DAG.getConstant(iter + LoadOffset, MVT::i8); Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Chain, PtrLo, PtrHi, @@ -1302,7 +1306,8 @@ SDValue PIC16TargetLowering::LegalizeCALL(SDValue Op, SelectionDAG &DAG) { // Generate new call with all the operands legal return DAG.getCall(TheCall->getCallingConv(), dl, TheCall->isVarArg(), TheCall->isTailCall(), - TheCall->isInreg(), VTs, &Ops[0], Ops.size()); + TheCall->isInreg(), VTs, &Ops[0], Ops.size(), + TheCall->getNumFixedArgs()); } void PIC16TargetLowering:: diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h index ca9650d..b40ea12 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.h +++ b/lib/Target/PIC16/PIC16ISelLowering.h @@ -145,6 +145,11 @@ namespace llvm { unsigned GetTmpSize() { return TmpSize; } void SetTmpSize(unsigned Size) { TmpSize = Size; } + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *) const { + // FIXME: The function never seems to be aligned. + return 1; + } private: // If the Node is a BUILD_PAIR representing a direct Address, // then this function will return true. diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td index 7557716..a054bdc 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.td +++ b/lib/Target/PIC16/PIC16InstrInfo.td @@ -299,7 +299,7 @@ def store_indirect : // Direct load. // Input Operands are: ptrlo = GA, offset = offset, ptrhi = banksel. // Output: dst = W -let mayLoad = 1 in +let Defs = [STATUS], mayLoad = 1 in class MOVF_INSN<bits<6> OpCode, SDNode OpNodeSrc, SDNode Op>: ByteFormat<0, (outs GPR:$dst), (ins i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp index 4304732..77ad188 100644 --- a/lib/Target/PIC16/PIC16TargetMachine.cpp +++ b/lib/Target/PIC16/PIC16TargetMachine.cpp @@ -65,11 +65,11 @@ bool PIC16TargetMachine::addInstSelector(PassManagerBase &PM, return false; } -bool PIC16TargetMachine:: -addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool Verbose, raw_ostream &Out) { +bool PIC16TargetMachine::addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, raw_ostream &Out) { // Output assembly language. - PM.add(createPIC16CodePrinterPass(Out, *this, OptLevel, Verbose)); + PM.add(createPIC16CodePrinterPass(Out, *this, Verbose)); return false; } diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index c5aa6ae..7f1673c 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -56,9 +56,8 @@ namespace { const PPCSubtarget &Subtarget; public: explicit PPCAsmPrinter(raw_ostream &O, TargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : AsmPrinter(O, TM, T, OL, V), + const TargetAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V), Subtarget(TM.getSubtarget<PPCSubtarget>()) {} virtual const char *getPassName() const { @@ -189,8 +188,7 @@ namespace { if (TM.getRelocationModel() != Reloc::Static) { if (MO.getType() == MachineOperand::MO_GlobalAddress) { GlobalValue *GV = MO.getGlobal(); - if (((GV->isDeclaration() || GV->hasWeakLinkage() || - GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) { + if (GV->isDeclaration() || GV->isWeakForLinker()) { // Dynamically-resolved functions need a stub for the function. std::string Name = Mang->getValueName(GV); FnStubs.insert(Name); @@ -296,9 +294,8 @@ namespace { class VISIBILITY_HIDDEN PPCLinuxAsmPrinter : public PPCAsmPrinter { public: explicit PPCLinuxAsmPrinter(raw_ostream &O, PPCTargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : PPCAsmPrinter(O, TM, T, OL, V){} + const TargetAsmInfo *T, bool V) + : PPCAsmPrinter(O, TM, T, V){} virtual const char *getPassName() const { return "Linux PPC Assembly Printer"; @@ -323,9 +320,8 @@ namespace { raw_ostream &OS; public: explicit PPCDarwinAsmPrinter(raw_ostream &O, PPCTargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : PPCAsmPrinter(O, TM, T, OL, V), OS(O) {} + const TargetAsmInfo *T, bool V) + : PPCAsmPrinter(O, TM, T, V), OS(O) {} virtual const char *getPassName() const { return "Darwin PPC Assembly Printer"; @@ -387,11 +383,12 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) { if (TM.getRelocationModel() != Reloc::Static) { if (GV->isDeclaration() || GV->isWeakForLinker()) { if (GV->hasHiddenVisibility()) { - if (!GV->isDeclaration() && !GV->hasCommonLinkage()) - O << Name; - else { + if (GV->isDeclaration() || GV->hasCommonLinkage() || + GV->hasAvailableExternallyLinkage()) { HiddenGVStubs.insert(Name); printSuffixedName(Name, "$non_lazy_ptr"); + } else { + O << Name; } } else { GVStubs.insert(Name); @@ -596,7 +593,7 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) { printVisibility(CurrentFnName, F->getVisibility()); - EmitAlignment(2, F); + EmitAlignment(MF.getAlignment(), F); O << CurrentFnName << ":\n"; // Emit pre-function debug information. @@ -773,7 +770,7 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) { printVisibility(CurrentFnName, F->getVisibility()); - EmitAlignment(F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4, F); + EmitAlignment(MF.getAlignment(), F); O << CurrentFnName << ":\n"; // Emit pre-function debug information. @@ -1119,16 +1116,13 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { /// FunctionPass *llvm::createPPCAsmPrinterPass(raw_ostream &o, PPCTargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose) { const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>(); if (Subtarget->isDarwin()) { - return new PPCDarwinAsmPrinter(o, tm, tm.getTargetAsmInfo(), - OptLevel, verbose); + return new PPCDarwinAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); } else { - return new PPCLinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), - OptLevel, verbose); + return new PPCLinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); } } diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 0b67aff..a6479d8 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -26,3 +26,5 @@ add_llvm_target(PowerPCCodeGen PPCTargetAsmInfo.cpp PPCTargetMachine.cpp ) + +target_link_libraries (LLVMPowerPCCodeGen LLVMSelectionDAG) diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index c844e21..f6c3469 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -28,9 +28,8 @@ namespace llvm { FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); -FunctionPass *createPPCAsmPrinterPass(raw_ostream &OS, - PPCTargetMachine &TM, - CodeGenOpt::Level OptLevel, bool Verbose); +FunctionPass *createPPCAsmPrinterPass(raw_ostream &OS, PPCTargetMachine &TM, + bool Verbose); FunctionPass *createPPCCodeEmitterPass(PPCTargetMachine &TM, MachineCodeEmitter &MCE); FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 9f916f3..c7ce171 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -43,10 +43,8 @@ def CC_PPC : CallingConv<[ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, // Common sub-targets passes FP values in F1 - F13 - CCIfType<[f32, f64], CCIfSubtarget<"isMachoABI()", - CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>>, - // ELF32 sub-target pass FP values in F1 - F8. - CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + CCIfType<[f32, f64], + CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>, // The first 12 Vector arguments are passed in altivec registers. CCIfType<[v16i8, v8i16, v4i32, v4f32], @@ -64,3 +62,86 @@ def CC_PPC : CallingConv<[ */ +//===----------------------------------------------------------------------===// +// PowerPC System V Release 4 ABI +//===----------------------------------------------------------------------===// + +// _Complex arguments are never split, thus their two scalars are either +// passed both in argument registers or both on the stack. Also _Complex +// arguments are always passed in general purpose registers, never in +// Floating-point registers or vector registers. Arguments which should go +// on the stack are marked with the inreg parameter attribute. +// Giving inreg this target-dependent (and counter-intuitive) meaning +// simplifies things, because functions calls are not always coming from the +// frontend but are also created implicitly e.g. for libcalls. If inreg would +// actually mean that the argument is passed in a register, then all places +// which create function calls/function definitions implicitly would need to +// be aware of this fact and would need to mark arguments accordingly. With +// inreg meaning that the argument is passed on the stack, this is not an +// issue, except for calls which involve _Complex types. + +def CC_PPC_SVR4_Common : CallingConv<[ + // The ABI requires i64 to be passed in two adjacent registers with the first + // register having an odd register number. + CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>, + + // The first 8 integer arguments are passed in integer registers. + CCIfType<[i32], CCIf<"!ArgFlags.isInReg()", + CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>, + + // Make sure the i64 words from a long double are either both passed in + // registers or both passed on the stack. + CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>, + + // FP values are passed in F1 - F8. + CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + + // Split arguments have an alignment of 8 bytes on the stack. + CCIfType<[i32], CCIfSplit<CCAssignToStack<4, 8>>>, + + CCIfType<[i32], CCAssignToStack<4, 4>>, + + // Floats are stored in double precision format, thus they have the same + // alignment and size as doubles. + CCIfType<[f32,f64], CCAssignToStack<8, 8>>, + + // Vectors get 16-byte stack slots that are 16-byte aligned. + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToStack<16, 16>> +]>; + +// This calling convention puts vector arguments always on the stack. It is used +// to assign vector arguments which belong to the variable portion of the +// parameter list of a variable argument function. +def CC_PPC_SVR4_VarArg : CallingConv<[ + CCDelegateTo<CC_PPC_SVR4_Common> +]>; + +// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put +// vector arguments in vector registers before putting them on the stack. +def CC_PPC_SVR4 : CallingConv<[ + // The first 12 Vector arguments are passed in AltiVec registers. + CCIfType<[v16i8, v8i16, v4i32, v4f32], + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>, + + CCDelegateTo<CC_PPC_SVR4_Common> +]>; + +// Helper "calling convention" to handle aggregate by value arguments. +// Aggregate by value arguments are always placed in the local variable space +// of the caller. This calling convention is only used to assign those stack +// offsets in the callers stack frame. +// +// Still, the address of the aggregate copy in the callers stack frame is passed +// in a GPR (or in the parameter list area if all GPRs are allocated) from the +// caller to the callee. The location for the address argument is assigned by +// the CC_PPC_SVR4 calling convention. +// +// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are +// not passed by value. + +def CC_PPC_SVR4_ByVal : CallingConv<[ + CCIfByVal<CCPassByVal<4, 4>>, + + CCCustom<"CC_PPC_SVR4_Custom_Dummy"> +]>; + diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index aa3dce1..cd6018d 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -163,8 +163,8 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, } else if (MO.isGlobal() || MO.isSymbol() || MO.isCPI() || MO.isJTI()) { unsigned Reloc = 0; - if (MI.getOpcode() == PPC::BL_Macho || MI.getOpcode() == PPC::BL8_Macho || - MI.getOpcode() == PPC::BL_ELF || MI.getOpcode() == PPC::BL8_ELF || + if (MI.getOpcode() == PPC::BL_Darwin || MI.getOpcode() == PPC::BL8_Darwin || + MI.getOpcode() == PPC::BL_SVR4 || MI.getOpcode() == PPC::BL8_ELF || MI.getOpcode() == PPC::TAILB || MI.getOpcode() == PPC::TAILB8) Reloc = PPC::reloc_pcrel_bx; else { @@ -246,9 +246,9 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, } else if (MO.isMBB()) { unsigned Reloc = 0; unsigned Opcode = MI.getOpcode(); - if (Opcode == PPC::B || Opcode == PPC::BL_Macho || - Opcode == PPC::BLA_Macho || Opcode == PPC::BL_ELF || - Opcode == PPC::BLA_ELF) + if (Opcode == PPC::B || Opcode == PPC::BL_Darwin || + Opcode == PPC::BLA_Darwin|| Opcode == PPC::BL_SVR4 || + Opcode == PPC::BLA_SVR4) Reloc = PPC::reloc_pcrel_bx; else // BCC instruction Reloc = PPC::reloc_pcrel_bcx; diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h index 1b5893d..770a560 100644 --- a/lib/Target/PowerPC/PPCFrameInfo.h +++ b/lib/Target/PowerPC/PPCFrameInfo.h @@ -14,8 +14,10 @@ #define POWERPC_FRAMEINFO_H #include "PPC.h" +#include "PPCSubtarget.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/STLExtras.h" namespace llvm { @@ -29,63 +31,153 @@ public: /// getReturnSaveOffset - Return the previous frame offset to save the /// return address. - static unsigned getReturnSaveOffset(bool LP64, bool isMacho) { - if (isMacho) + static unsigned getReturnSaveOffset(bool LP64, bool isDarwinABI) { + if (isDarwinABI) return LP64 ? 16 : 8; - // For ELF 32 ABI: + // SVR4 ABI: return 4; } /// getFramePointerSaveOffset - Return the previous frame offset to save the /// frame pointer. - static unsigned getFramePointerSaveOffset(bool LP64, bool isMacho) { - // For MachO ABI: + static unsigned getFramePointerSaveOffset(bool LP64, bool isDarwinABI) { + // For the Darwin ABI: // Use the TOC save slot in the PowerPC linkage area for saving the frame // pointer (if needed.) LLVM does not generate code that uses the TOC (R2 // is treated as a caller saved register.) - if (isMacho) + if (isDarwinABI) return LP64 ? 40 : 20; - // For ELF 32 ABI: + // SVR4 ABI: // Save it right before the link register return -4U; } /// getLinkageSize - Return the size of the PowerPC ABI linkage area. /// - static unsigned getLinkageSize(bool LP64, bool isMacho) { - if (isMacho) + static unsigned getLinkageSize(bool LP64, bool isDarwinABI) { + if (isDarwinABI) return 6 * (LP64 ? 8 : 4); - // For ELF 32 ABI: + // SVR4 ABI: return 8; } /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI /// argument area. - static unsigned getMinCallArgumentsSize(bool LP64, bool isMacho) { - // For Macho ABI: + static unsigned getMinCallArgumentsSize(bool LP64, bool isDarwinABI) { + // For the Darwin ABI: // The prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if its varargs. // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. - if (isMacho) + if (isDarwinABI) return 8 * (LP64 ? 8 : 4); - // For ELF 32 ABI: + // SVR4 ABI: // There is no default stack allocated for the 8 first GPR arguments. return 0; } /// getMinCallFrameSize - Return the minimum size a call frame can be using /// the PowerPC ABI. - static unsigned getMinCallFrameSize(bool LP64, bool isMacho) { + static unsigned getMinCallFrameSize(bool LP64, bool isDarwinABI) { // The call frame needs to be at least big enough for linkage and 8 args. - return getLinkageSize(LP64, isMacho) + - getMinCallArgumentsSize(LP64, isMacho); + return getLinkageSize(LP64, isDarwinABI) + + getMinCallArgumentsSize(LP64, isDarwinABI); + } + + // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. + const std::pair<unsigned, int> * + getCalleeSavedSpillSlots(unsigned &NumEntries) const { + // Early exit if not using the SVR4 ABI. + if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) { + NumEntries = 0; + return 0; + } + + static const std::pair<unsigned, int> Offsets[] = { + // Floating-point register save area offsets. + std::pair<unsigned, int>(PPC::F31, -8), + std::pair<unsigned, int>(PPC::F30, -16), + std::pair<unsigned, int>(PPC::F29, -24), + std::pair<unsigned, int>(PPC::F28, -32), + std::pair<unsigned, int>(PPC::F27, -40), + std::pair<unsigned, int>(PPC::F26, -48), + std::pair<unsigned, int>(PPC::F25, -56), + std::pair<unsigned, int>(PPC::F24, -64), + std::pair<unsigned, int>(PPC::F23, -72), + std::pair<unsigned, int>(PPC::F22, -80), + std::pair<unsigned, int>(PPC::F21, -88), + std::pair<unsigned, int>(PPC::F20, -96), + std::pair<unsigned, int>(PPC::F19, -104), + std::pair<unsigned, int>(PPC::F18, -112), + std::pair<unsigned, int>(PPC::F17, -120), + std::pair<unsigned, int>(PPC::F16, -128), + std::pair<unsigned, int>(PPC::F15, -136), + std::pair<unsigned, int>(PPC::F14, -144), + + // General register save area offsets. + std::pair<unsigned, int>(PPC::R31, -4), + std::pair<unsigned, int>(PPC::R30, -8), + std::pair<unsigned, int>(PPC::R29, -12), + std::pair<unsigned, int>(PPC::R28, -16), + std::pair<unsigned, int>(PPC::R27, -20), + std::pair<unsigned, int>(PPC::R26, -24), + std::pair<unsigned, int>(PPC::R25, -28), + std::pair<unsigned, int>(PPC::R24, -32), + std::pair<unsigned, int>(PPC::R23, -36), + std::pair<unsigned, int>(PPC::R22, -40), + std::pair<unsigned, int>(PPC::R21, -44), + std::pair<unsigned, int>(PPC::R20, -48), + std::pair<unsigned, int>(PPC::R19, -52), + std::pair<unsigned, int>(PPC::R18, -56), + std::pair<unsigned, int>(PPC::R17, -60), + std::pair<unsigned, int>(PPC::R16, -64), + std::pair<unsigned, int>(PPC::R15, -68), + std::pair<unsigned, int>(PPC::R14, -72), + + // CR save area offset. + // FIXME SVR4: Disable CR save area for now. +// std::pair<unsigned, int>(PPC::CR2, -4), +// std::pair<unsigned, int>(PPC::CR3, -4), +// std::pair<unsigned, int>(PPC::CR4, -4), +// std::pair<unsigned, int>(PPC::CR2LT, -4), +// std::pair<unsigned, int>(PPC::CR2GT, -4), +// std::pair<unsigned, int>(PPC::CR2EQ, -4), +// std::pair<unsigned, int>(PPC::CR2UN, -4), +// std::pair<unsigned, int>(PPC::CR3LT, -4), +// std::pair<unsigned, int>(PPC::CR3GT, -4), +// std::pair<unsigned, int>(PPC::CR3EQ, -4), +// std::pair<unsigned, int>(PPC::CR3UN, -4), +// std::pair<unsigned, int>(PPC::CR4LT, -4), +// std::pair<unsigned, int>(PPC::CR4GT, -4), +// std::pair<unsigned, int>(PPC::CR4EQ, -4), +// std::pair<unsigned, int>(PPC::CR4UN, -4), + + // VRSAVE save area offset. + std::pair<unsigned, int>(PPC::VRSAVE, -4), + + // Vector register save area + std::pair<unsigned, int>(PPC::V31, -16), + std::pair<unsigned, int>(PPC::V30, -32), + std::pair<unsigned, int>(PPC::V29, -48), + std::pair<unsigned, int>(PPC::V28, -64), + std::pair<unsigned, int>(PPC::V27, -80), + std::pair<unsigned, int>(PPC::V26, -96), + std::pair<unsigned, int>(PPC::V25, -112), + std::pair<unsigned, int>(PPC::V24, -128), + std::pair<unsigned, int>(PPC::V23, -144), + std::pair<unsigned, int>(PPC::V22, -160), + std::pair<unsigned, int>(PPC::V21, -176), + std::pair<unsigned, int>(PPC::V20, -192) + }; + + NumEntries = array_lengthof(Offsets); + + return Offsets; } - }; } // End llvm namespace diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index e7658fc..ec3e757 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -159,7 +159,7 @@ getHazardType(SUnit *SU) { } // Do not allow MTCTR and BCTRL to be in the same dispatch group. - if (HasCTRSet && (Opcode == PPC::BCTRL_Macho || Opcode == PPC::BCTRL_ELF)) + if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4)) return NoopHazard; // If this is a load following a store, make sure it's not to the same or diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 87f8fb0b4..1c6b287 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -35,6 +35,21 @@ #include "llvm/DerivedTypes.h" using namespace llvm; +static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); + static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc", cl::desc("enable preincrement load/store generation on PPC (experimental)"), cl::Hidden); @@ -190,8 +205,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - // VAARG is custom lowered with ELF 32 ABI - if (TM.getSubtarget<PPCSubtarget>().isELF32_ABI()) + // VAARG is custom lowered with the SVR4 ABI + if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) setOperationAction(ISD::VAARG, MVT::Other, Custom); else setOperationAction(ISD::VAARG, MVT::Other, Expand); @@ -380,7 +395,7 @@ unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const { // Darwin passes everything on 4 byte boundary. if (TM.getSubtarget<PPCSubtarget>().isDarwin()) return 4; - // FIXME Elf TBD + // FIXME SVR4 TBD return 4; } @@ -404,11 +419,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::SHL: return "PPCISD::SHL"; case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; case PPCISD::STD_32: return "PPCISD::STD_32"; - case PPCISD::CALL_ELF: return "PPCISD::CALL_ELF"; - case PPCISD::CALL_Macho: return "PPCISD::CALL_Macho"; + case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; + case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; - case PPCISD::BCTRL_Macho: return "PPCISD::BCTRL_Macho"; - case PPCISD::BCTRL_ELF: return "PPCISD::BCTRL_ELF"; + case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin"; + case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; case PPCISD::MFCR: return "PPCISD::MFCR"; case PPCISD::VCMP: return "PPCISD::VCMP"; @@ -428,11 +443,17 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { } } - MVT PPCTargetLowering::getSetCCResultType(MVT VT) const { return MVT::i32; } +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned PPCTargetLowering::getFunctionAlignment(const Function *F) const { + if (getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) + return F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4; + else + return 2; +} //===----------------------------------------------------------------------===// // Node matching predicates, for use by the tblgen matching code. @@ -1228,7 +1249,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget) { - assert(0 && "VAARG in ELF32 ABI not implemented yet!"); + assert(0 && "VAARG not yet implemented for the SVR4 ABI!"); return SDValue(); // Not reached } @@ -1261,7 +1282,7 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) { // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) std::pair<SDValue, SDValue> CallResult = LowerCallTo(Chain, Op.getValueType().getTypeForMVT(), false, false, - false, false, CallingConv::C, false, + false, false, 0, CallingConv::C, false, DAG.getExternalSymbol("__trampoline_setup", PtrVT), Args, DAG, dl); @@ -1279,7 +1300,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) { DebugLoc dl = Op.getDebugLoc(); - if (Subtarget.isMachoABI()) { + if (Subtarget.isDarwinABI()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -1288,7 +1309,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); } - // For ELF 32 ABI we follow the layout of the va_list struct. + // For the SVR4 ABI we follow the layout of the va_list struct. // We suppose the given va_list is already allocated. // // typedef struct { @@ -1313,8 +1334,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, // } va_list[1]; - SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8); - SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8); + SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i32); + SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i32); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -1334,15 +1355,15 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); // Store first byte : number of int regs - SDValue firstStore = DAG.getStore(Op.getOperand(0), dl, ArgGPR, - Op.getOperand(1), SV, 0); + SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, + Op.getOperand(1), SV, 0, MVT::i8); uint64_t nextOffset = FPROffset; SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), ConstFPROffset); // Store second byte : number of float regs SDValue secondStore = - DAG.getStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset); + DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8); nextOffset += StackOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); @@ -1359,10 +1380,71 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, #include "PPCGenCallingConv.inc" +static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + return true; +} + +static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned ArgRegs[] = { + PPC::R3, PPC::R4, PPC::R5, PPC::R6, + PPC::R7, PPC::R8, PPC::R9, PPC::R10, + }; + const unsigned NumArgRegs = array_lengthof(ArgRegs); + + unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); + + // Skip one register if the first unallocated register has an even register + // number and there are still argument registers available which have not been + // allocated yet. RegNum is actually an index into ArgRegs, which means we + // need to skip a register if RegNum is odd. + if (RegNum != NumArgRegs && RegNum % 2 == 1) { + State.AllocateReg(ArgRegs[RegNum]); + } + + // Always return false here, as this function only makes sure that the first + // unallocated register has an odd register number and does not actually + // allocate a register for the current argument. + return false; +} + +static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned ArgRegs[] = { + PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8 + }; + + const unsigned NumArgRegs = array_lengthof(ArgRegs); + + unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); + + // If there is only one Floating-point register left we need to put both f64 + // values of a split ppc_fp128 value on the stack. + if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { + State.AllocateReg(ArgRegs[RegNum]); + } + + // Always return false here, as this function only makes sure that the two f64 + // values a ppc_fp128 value is split into are both passed in registers or both + // passed on the stack and does not actually allocate a register for the + // current argument. + return false; +} + /// GetFPR - Get the set of FP registers that should be allocated for arguments, /// depending on which subtarget is selected. static const unsigned *GetFPR(const PPCSubtarget &Subtarget) { - if (Subtarget.isMachoABI()) { + if (Subtarget.isDarwinABI()) { static const unsigned FPR[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 @@ -1381,9 +1463,9 @@ static const unsigned *GetFPR(const PPCSubtarget &Subtarget) { /// CalculateStackSlotSize - Calculates the size reserved for this argument on /// the stack. static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags, - bool isVarArg, unsigned PtrByteSize) { + unsigned PtrByteSize) { MVT ArgVT = Arg.getValueType(); - unsigned ArgSize =ArgVT.getSizeInBits()/8; + unsigned ArgSize = ArgVT.getSizeInBits()/8; if (Flags.isByVal()) ArgSize = Flags.getByValSize(); ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; @@ -1392,18 +1474,248 @@ static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags, } SDValue -PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, - SelectionDAG &DAG, - int &VarArgsFrameIndex, - int &VarArgsStackOffset, - unsigned &VarArgsNumGPR, - unsigned &VarArgsNumFPR, - const PPCSubtarget &Subtarget) { +PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, + SelectionDAG &DAG, + int &VarArgsFrameIndex, + int &VarArgsStackOffset, + unsigned &VarArgsNumGPR, + unsigned &VarArgsNumFPR, + const PPCSubtarget &Subtarget) { + // SVR4 ABI Stack Frame Layout: + // +-----------------------------------+ + // +--> | Back chain | + // | +-----------------------------------+ + // | | Floating-point register save area | + // | +-----------------------------------+ + // | | General register save area | + // | +-----------------------------------+ + // | | CR save word | + // | +-----------------------------------+ + // | | VRSAVE save word | + // | +-----------------------------------+ + // | | Alignment padding | + // | +-----------------------------------+ + // | | Vector register save area | + // | +-----------------------------------+ + // | | Local variable space | + // | +-----------------------------------+ + // | | Parameter list area | + // | +-----------------------------------+ + // | | LR save word | + // | +-----------------------------------+ + // SP--> +--- | Back chain | + // +-----------------------------------+ + // + // Specifications: + // System V Application Binary Interface PowerPC Processor Supplement + // AltiVec Technology Programming Interface Manual + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + SmallVector<SDValue, 8> ArgValues; + SDValue Root = Op.getOperand(0); + bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0; + DebugLoc dl = Op.getDebugLoc(); + + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Potential tail calls could cause overwriting of argument stack slots. + unsigned CC = MF.getFunction()->getCallingConv(); + bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast)); + unsigned PtrByteSize = 4; + + // Assign locations to all of the incoming arguments. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + + // Reserve space for the linkage area on the stack. + CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize); + + CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_PPC_SVR4); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + // Arguments stored in registers. + if (VA.isRegLoc()) { + TargetRegisterClass *RC; + MVT ValVT = VA.getValVT(); + + switch (ValVT.getSimpleVT()) { + default: + assert(0 && "ValVT not supported by FORMAL_ARGUMENTS Lowering"); + case MVT::i32: + RC = PPC::GPRCRegisterClass; + break; + case MVT::f32: + RC = PPC::F4RCRegisterClass; + break; + case MVT::f64: + RC = PPC::F8RCRegisterClass; + break; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v4f32: + RC = PPC::VRRCRegisterClass; + break; + } + + // Transform the arguments stored in physical registers into virtual ones. + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, ValVT); + + ArgValues.push_back(ArgValue); + } else { + // Argument stored in memory. + assert(VA.isMemLoc()); + + unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; + int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), + isImmutable); + + // Create load nodes to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0)); + } + } + + // Assign locations to all of the incoming aggregate by value arguments. + // Aggregates passed by value are stored in the local variable space of the + // caller's stack frame, right above the parameter list area. + SmallVector<CCValAssign, 16> ByValArgLocs; + CCState CCByValInfo(CC, isVarArg, getTargetMachine(), ByValArgLocs); + + // Reserve stack space for the allocations in CCInfo. + CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); + + CCByValInfo.AnalyzeFormalArguments(Op.getNode(), CC_PPC_SVR4_ByVal); + + // Area that is at least reserved in the caller of this function. + unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); + + // Set the size that is at least reserved in caller of this function. Tail + // call optimized function's reserved stack space needs to be aligned so that + // taking the difference between two stack areas will result in an aligned + // stack. + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + + MinReservedArea = + std::max(MinReservedArea, + PPCFrameInfo::getMinCallFrameSize(false, false)); + + unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> + getStackAlignment(); + unsigned AlignMask = TargetAlign-1; + MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; + + FI->setMinReservedArea(MinReservedArea); + + SmallVector<SDValue, 8> MemOps; + + // If the function takes variable number of arguments, make a frame index for + // the start of the first vararg value... for expansion of llvm.va_start. + if (isVarArg) { + static const unsigned GPArgRegs[] = { + PPC::R3, PPC::R4, PPC::R5, PPC::R6, + PPC::R7, PPC::R8, PPC::R9, PPC::R10, + }; + const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); + + static const unsigned FPArgRegs[] = { + PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8 + }; + const unsigned NumFPArgRegs = array_lengthof(FPArgRegs); + + VarArgsNumGPR = CCInfo.getFirstUnallocated(GPArgRegs, NumGPArgRegs); + VarArgsNumFPR = CCInfo.getFirstUnallocated(FPArgRegs, NumFPArgRegs); + + // Make room for NumGPArgRegs and NumFPArgRegs. + int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + + NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; + + VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, + CCInfo.getNextStackOffset()); + + VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8); + SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + + // The fixed integer arguments of a variadic function are + // stored to the VarArgsFrameIndex on the stack. + unsigned GPRIndex = 0; + for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) { + SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT); + SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by four for the next argument to store + SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + + // If this function is vararg, store any remaining integer argument regs + // to their spots on the stack so that they may be loaded by deferencing the + // result of va_next. + for (; GPRIndex != NumGPArgRegs; ++GPRIndex) { + unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); + + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by four for the next argument to store + SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + + // FIXME SVR4: We only need to save FP argument registers if CR bit 6 is + // set. + + // The double arguments are stored to the VarArgsFrameIndex + // on the stack. + unsigned FPRIndex = 0; + for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) { + SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64); + SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by eight for the next argument to store + SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, + PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + + for (; FPRIndex != NumFPArgRegs; ++FPRIndex) { + unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); + + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by eight for the next argument to store + SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, + PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + } + + if (!MemOps.empty()) + Root = DAG.getNode(ISD::TokenFactor, dl, + MVT::Other, &MemOps[0], MemOps.size()); + + + ArgValues.push_back(Root); + + // Return the new list of results. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), + &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); +} + +SDValue +PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, + SelectionDAG &DAG, + int &VarArgsFrameIndex, + const PPCSubtarget &Subtarget) { // TODO: add description of PPC stack frame format, or at least some docs. // MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); SmallVector<SDValue, 8> ArgValues; SDValue Root = Op.getOperand(0); bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0; @@ -1411,14 +1723,12 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; - bool isMachoABI = Subtarget.isMachoABI(); - bool isELF32_ABI = Subtarget.isELF32_ABI(); // Potential tail calls could cause overwriting of argument stack slots. unsigned CC = MF.getFunction()->getCallingConv(); bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast)); unsigned PtrByteSize = isPPC64 ? 8 : 4; - unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); + unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true); // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; @@ -1439,7 +1749,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, }; const unsigned Num_GPR_Regs = array_lengthof(GPR_32); - const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8; + const unsigned Num_FPR_Regs = 13; const unsigned Num_VR_Regs = array_lengthof( VR); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; @@ -1453,8 +1763,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, // that out...for the pathological case, compute VecArgOffset as the // start of the vector parameter area. Computing VecArgOffset is the // entire point of the following loop. - // Altivec is not mentioned in the ppc32 Elf Supplement, so I'm not trying - // to handle Elf here. unsigned VecArgOffset = ArgOffset; if (!isVarArg && !isPPC64) { for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e; @@ -1500,10 +1808,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, // although the first ones are often in registers. - // - // In the ELF 32 ABI, GPRs and stack are double word align: an argument - // represented with two words (long long or double) must be copied to an - // even GPR_idx value or to an even ArgOffset value. SmallVector<SDValue, 8> MemOps; unsigned nAltivecParamsAtEnd = 0; @@ -1516,8 +1820,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags(); - // See if next argument requires stack alignment in ELF - bool Align = Flags.isSplit(); unsigned CurArgOffset = ArgOffset; @@ -1528,25 +1830,20 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, MinReservedArea = ((MinReservedArea+15)/16)*16; MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), Flags, - isVarArg, PtrByteSize); } else nAltivecParamsAtEnd++; } else // Calculate min reserved area. MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), Flags, - isVarArg, PtrByteSize); - // FIXME alignment for ELF may not be right // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. if (Flags.isByVal()) { // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; - // Double word align in ELF - if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2); // Objects of size 1 and 2 are right justified, everything else is // left justified. This means the memory address is adjusted forwards. if (ObjSize==1 || ObjSize==2) { @@ -1558,17 +1855,16 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, ArgValues.push_back(FIN); if (ObjSize==1 || ObjSize==2) { if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); - RegInfo.addLiveIn(GPR[GPR_idx], VReg); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 ); MemOps.push_back(Store); ++GPR_idx; - if (isMachoABI) ArgOffset += PtrByteSize; - } else { - ArgOffset += PtrByteSize; } + + ArgOffset += PtrByteSize; + continue; } for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { @@ -1576,15 +1872,14 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, // to memory. ArgVal will be address of the beginning of // the object. if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); - RegInfo.addLiveIn(GPR[GPR_idx], VReg); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); MemOps.push_back(Store); ++GPR_idx; - if (isMachoABI) ArgOffset += PtrByteSize; + ArgOffset += PtrByteSize; } else { ArgOffset += ArgSize - (ArgOffset-CurArgOffset); break; @@ -1597,30 +1892,22 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, default: assert(0 && "Unhandled argument type!"); case MVT::i32: if (!isPPC64) { - // Double word align in ELF - if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2); - if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); - RegInfo.addLiveIn(GPR[GPR_idx], VReg); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); ++GPR_idx; } else { needsLoad = true; ArgSize = PtrByteSize; } - // Stack align in ELF - if (needsLoad && Align && isELF32_ABI) - ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; - // All int arguments reserve stack space in Macho ABI. - if (isMachoABI || needsLoad) ArgOffset += PtrByteSize; + // All int arguments reserve stack space in the Darwin ABI. + ArgOffset += PtrByteSize; break; } // FALLTHROUGH case MVT::i64: // PPC64 if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); - RegInfo.addLiveIn(GPR[GPR_idx], VReg); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32) { @@ -1641,37 +1928,35 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, needsLoad = true; ArgSize = PtrByteSize; } - // All int arguments reserve stack space in Macho ABI. - if (isMachoABI || needsLoad) ArgOffset += 8; + // All int arguments reserve stack space in the Darwin ABI. + ArgOffset += 8; break; case MVT::f32: case MVT::f64: // Every 4 bytes of argument space consumes one of the GPRs available for // argument passing. - if (GPR_idx != Num_GPR_Regs && isMachoABI) { + if (GPR_idx != Num_GPR_Regs) { ++GPR_idx; if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) ++GPR_idx; } if (FPR_idx != Num_FPR_Regs) { unsigned VReg; + if (ObjectVT == MVT::f32) - VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass); + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); else - VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); - RegInfo.addLiveIn(FPR[FPR_idx], VReg); + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); + ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT); ++FPR_idx; } else { needsLoad = true; } - // Stack align in ELF - if (needsLoad && Align && isELF32_ABI) - ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; - // All FP arguments reserve stack space in Macho ABI. - if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize; + // All FP arguments reserve stack space in the Darwin ABI. + ArgOffset += isPPC64 ? 8 : ObjSize; break; case MVT::v4f32: case MVT::v4i32: @@ -1680,8 +1965,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, // Note that vector arguments in registers don't reserve stack space, // except in varargs functions. if (VR_idx != Num_VR_Regs) { - unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass); - RegInfo.addLiveIn(VR[VR_idx], VReg); + unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT); if (isVarArg) { while ((ArgOffset % 16) != 0) { @@ -1734,7 +2018,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, } MinReservedArea = std::max(MinReservedArea, - PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); + PPCFrameInfo::getMinCallFrameSize(isPPC64, true)); unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> getStackAlignment(); unsigned AlignMask = TargetAlign-1; @@ -1744,53 +2028,23 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - - int depth; - if (isELF32_ABI) { - VarArgsNumGPR = GPR_idx; - VarArgsNumFPR = FPR_idx; - - // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame - // pointer. - depth = -(Num_GPR_Regs * PtrVT.getSizeInBits()/8 + - Num_FPR_Regs * MVT(MVT::f64).getSizeInBits()/8 + - PtrVT.getSizeInBits()/8); - - VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - ArgOffset); - - } - else - depth = ArgOffset; + int Depth = ArgOffset; VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - depth); + Depth); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); - // In ELF 32 ABI, the fixed integer arguments of a variadic function are - // stored to the VarArgsFrameIndex on the stack. - if (isELF32_ABI) { - for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) { - SDValue Val = DAG.getRegister(GPR[GPR_idx], PtrVT); - SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0); - MemOps.push_back(Store); - // Increment the address by four for the next argument to store - SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); - FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); - } - } - // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by deferencing the // result of va_next. for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { unsigned VReg; + if (isPPC64) - VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); else - VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); - RegInfo.addLiveIn(GPR[GPR_idx], VReg); SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); MemOps.push_back(Store); @@ -1798,34 +2052,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } - - // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex - // on the stack. - if (isELF32_ABI) { - for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) { - SDValue Val = DAG.getRegister(FPR[FPR_idx], MVT::f64); - SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0); - MemOps.push_back(Store); - // Increment the address by eight for the next argument to store - SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, - PtrVT); - FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); - } - - for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) { - unsigned VReg; - VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); - - RegInfo.addLiveIn(FPR[FPR_idx], VReg); - SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); - MemOps.push_back(Store); - // Increment the address by eight for the next argument to store - SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, - PtrVT); - FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); - } - } } if (!MemOps.empty()) @@ -1840,11 +2066,10 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, } /// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus -/// linkage area. +/// linkage area for the Darwin ABI. static unsigned CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, bool isPPC64, - bool isMachoABI, bool isVarArg, unsigned CC, CallSDNode *TheCall, @@ -1852,7 +2077,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with 24/48 bytes, which is // prereserved space for [SP][CR][LR][3 x unused]. - unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); + unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, true); unsigned NumOps = TheCall->getNumArgs(); unsigned PtrByteSize = isPPC64 ? 8 : 4; @@ -1879,7 +2104,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. NumBytes = ((NumBytes+15)/16)*16; } - NumBytes += CalculateStackSlotSize(Arg, Flags, isVarArg, PtrByteSize); + NumBytes += CalculateStackSlotSize(Arg, Flags, PtrByteSize); } // Allow for Altivec parameters at the end, if needed. @@ -1894,7 +2119,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. NumBytes = std::max(NumBytes, - PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); + PPCFrameInfo::getMinCallFrameSize(isPPC64, true)); // Tail call needs the stack to be aligned. if (CC==CallingConv::Fast && PerformTailCallOpt) { @@ -2017,26 +2242,30 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue OldFP, int SPDiff, bool isPPC64, - bool isMachoABI, + bool isDarwinABI, DebugLoc dl) { if (SPDiff) { // Calculate the new stack slot for the return address. int SlotSize = isPPC64 ? 8 : 4; int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64, - isMachoABI); + isDarwinABI); int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewRetAddrLoc); - int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, - isMachoABI); - int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc); - MVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, PseudoSourceValue::getFixedStack(NewRetAddr), 0); - SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); - Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, - PseudoSourceValue::getFixedStack(NewFPIdx), 0); + + // When using the SVR4 ABI there is no need to move the FP stack slot + // as the FP is never overwritten. + if (isDarwinABI) { + int NewFPLoc = + SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); + int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc); + SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); + Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, + PseudoSourceValue::getFixedStack(NewFPIdx), 0); + } } return Chain; } @@ -2067,6 +2296,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, SDValue Chain, SDValue &LROpOut, SDValue &FPOpOut, + bool isDarwinABI, DebugLoc dl) { if (SPDiff) { // Load the LR and FP stack slot for later adjusting. @@ -2074,9 +2304,14 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, LROpOut = getReturnAddrFrameIndex(DAG); LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0); Chain = SDValue(LROpOut.getNode(), 1); - FPOpOut = getFramePointerFrameIndex(DAG); - FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0); - Chain = SDValue(FPOpOut.getNode(), 1); + + // When using the SVR4 ABI there is no need to load the FP stack slot + // as the FP is never overwritten. + if (isDarwinABI) { + FPOpOut = getFramePointerFrameIndex(DAG); + FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0); + Chain = SDValue(FPOpOut.getNode(), 1); + } } return Chain; } @@ -2090,8 +2325,8 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, - unsigned Size, DebugLoc dl) { - SDValue SizeNode = DAG.getConstant(Size, MVT::i32); + DebugLoc dl) { + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), false, NULL, 0, NULL, 0); } @@ -2122,21 +2357,387 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, TailCallArguments); } -SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget, - TargetMachine &TM) { +static +void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, + DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, + SDValue LROp, SDValue FPOp, bool isDarwinABI, + SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) { + MachineFunction &MF = DAG.getMachineFunction(); + + // Emit a sequence of copyto/copyfrom virtual registers for arguments that + // might overwrite each other in case of tail call optimization. + SmallVector<SDValue, 8> MemOpChains2; + // Do not flag preceeding copytoreg stuff together with the following stuff. + InFlag = SDValue(); + StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, + MemOpChains2, dl); + if (!MemOpChains2.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains2[0], MemOpChains2.size()); + + // Store the return address to the appropriate stack slot. + Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, + isPPC64, isDarwinABI, dl); + + // Emit callseq_end just before tailcall node. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + InFlag = Chain.getValue(1); +} + +static +unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, + SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall, + SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, + SmallVector<SDValue, 8> &Ops, std::vector<MVT> &NodeTys, + bool isSVR4ABI) { + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + NodeTys.push_back(MVT::Other); // Returns a chain + NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. + + unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin; + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType()); + else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); + else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) + // If this is an absolute destination address, use the munged value. + Callee = SDValue(Dest, 0); + else { + // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair + // to do the call, we can't use PPCISD::CALL. + SDValue MTCTROps[] = {Chain, Callee, InFlag}; + Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, + 2 + (InFlag.getNode() != 0)); + InFlag = Chain.getValue(1); + + NodeTys.clear(); + NodeTys.push_back(MVT::Other); + NodeTys.push_back(MVT::Flag); + Ops.push_back(Chain); + CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin; + Callee.setNode(0); + // Add CTR register as callee so a bctr can be emitted later. + if (isTailCall) + Ops.push_back(DAG.getRegister(PPC::CTR, PtrVT)); + } + + // If this is a direct call, pass the chain and the callee. + if (Callee.getNode()) { + Ops.push_back(Chain); + Ops.push_back(Callee); + } + // If this is a tail call add stack pointer delta. + if (isTailCall) + Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); + + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + return CallOpc; +} + +static SDValue LowerCallReturn(SDValue Op, SelectionDAG &DAG, TargetMachine &TM, + CallSDNode *TheCall, SDValue Chain, + SDValue InFlag) { + bool isVarArg = TheCall->isVarArg(); + DebugLoc dl = TheCall->getDebugLoc(); + SmallVector<SDValue, 16> ResultVals; + SmallVector<CCValAssign, 16> RVLocs; + unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv(); + CCState CCRetInfo(CallerCC, isVarArg, TM, RVLocs); + CCRetInfo.AnalyzeCallResult(TheCall, RetCC_PPC); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + CCValAssign &VA = RVLocs[i]; + MVT VT = VA.getValVT(); + assert(VA.isRegLoc() && "Can only return in registers!"); + Chain = DAG.getCopyFromReg(Chain, dl, + VA.getLocReg(), VT, InFlag).getValue(1); + ResultVals.push_back(Chain.getValue(0)); + InFlag = Chain.getValue(2); + } + + // If the function returns void, just return the chain. + if (RVLocs.empty()) + return Chain; + + // Otherwise, merge everything together with a MERGE_VALUES node. + ResultVals.push_back(Chain); + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), + &ResultVals[0], ResultVals.size()); + return Res.getValue(Op.getResNo()); +} + +static +SDValue FinishCall(SelectionDAG &DAG, CallSDNode *TheCall, TargetMachine &TM, + SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, + SDValue Op, SDValue InFlag, SDValue Chain, SDValue &Callee, + int SPDiff, unsigned NumBytes) { + unsigned CC = TheCall->getCallingConv(); + DebugLoc dl = TheCall->getDebugLoc(); + bool isTailCall = TheCall->isTailCall() + && CC == CallingConv::Fast && PerformTailCallOpt; + + std::vector<MVT> NodeTys; + SmallVector<SDValue, 8> Ops; + unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, + isTailCall, RegsToPass, Ops, NodeTys, + TM.getSubtarget<PPCSubtarget>().isSVR4ABI()); + + // When performing tail call optimization the callee pops its arguments off + // the stack. Account for this here so these bytes can be pushed back on in + // PPCRegisterInfo::eliminateCallFramePseudoInstr. + int BytesCalleePops = + (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0; + + if (InFlag.getNode()) + Ops.push_back(InFlag); + + // Emit tail call. + if (isTailCall) { + assert(InFlag.getNode() && + "Flag must be set. Depend on flag being set in LowerRET"); + Chain = DAG.getNode(PPCISD::TAILCALL, dl, + TheCall->getVTList(), &Ops[0], Ops.size()); + return SDValue(Chain.getNode(), Op.getResNo()); + } + + Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(BytesCalleePops, true), + InFlag); + if (TheCall->getValueType(0) != MVT::Other) + InFlag = Chain.getValue(1); + + return LowerCallReturn(Op, DAG, TM, TheCall, Chain, InFlag); +} + +SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget, + TargetMachine &TM) { + // See PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4() for a description + // of the SVR4 ABI stack frame layout. CallSDNode *TheCall = cast<CallSDNode>(Op.getNode()); SDValue Chain = TheCall->getChain(); bool isVarArg = TheCall->isVarArg(); unsigned CC = TheCall->getCallingConv(); + assert((CC == CallingConv::C || + CC == CallingConv::Fast) && "Unknown calling convention!"); bool isTailCall = TheCall->isTailCall() && CC == CallingConv::Fast && PerformTailCallOpt; SDValue Callee = TheCall->getCallee(); - unsigned NumOps = TheCall->getNumArgs(); DebugLoc dl = TheCall->getDebugLoc(); - bool isMachoABI = Subtarget.isMachoABI(); - bool isELF32_ABI = Subtarget.isELF32_ABI(); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + unsigned PtrByteSize = 4; + + MachineFunction &MF = DAG.getMachineFunction(); + + // Mark this function as potentially containing a function that contains a + // tail call. As a consequence the frame pointer will be used for dynamicalloc + // and restoring the callers stack pointer in this functions epilog. This is + // done because by tail calling the called function might overwrite the value + // in this function's (MF) stack pointer stack slot 0(SP). + if (PerformTailCallOpt && CC==CallingConv::Fast) + MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); + + // Count how many bytes are to be pushed on the stack, including the linkage + // area, parameter list area and the part of the local variable space which + // contains copies of aggregates which are passed by value. + + // Assign locations to all of the outgoing arguments. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + + // Reserve space for the linkage area on the stack. + CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize); + + if (isVarArg) { + // Handle fixed and variable vector arguments differently. + // Fixed vector arguments go into registers as long as registers are + // available. Variable vector arguments always go into memory. + unsigned NumArgs = TheCall->getNumArgs(); + unsigned NumFixedArgs = TheCall->getNumFixedArgs(); + + for (unsigned i = 0; i != NumArgs; ++i) { + MVT ArgVT = TheCall->getArg(i).getValueType(); + ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i); + bool Result; + + if (i < NumFixedArgs) { + Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, + CCInfo); + } else { + Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, + ArgFlags, CCInfo); + } + + if (Result) { + cerr << "Call operand #" << i << " has unhandled type " + << ArgVT.getMVTString() << "\n"; + abort(); + } + } + } else { + // All arguments are treated the same. + CCInfo.AnalyzeCallOperands(TheCall, CC_PPC_SVR4); + } + + // Assign locations to all of the outgoing aggregate by value arguments. + SmallVector<CCValAssign, 16> ByValArgLocs; + CCState CCByValInfo(CC, isVarArg, getTargetMachine(), ByValArgLocs); + + // Reserve stack space for the allocations in CCInfo. + CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); + + CCByValInfo.AnalyzeCallOperands(TheCall, CC_PPC_SVR4_ByVal); + + // Size of the linkage area, parameter list area and the part of the local + // space variable where copies of aggregates which are passed by value are + // stored. + unsigned NumBytes = CCByValInfo.getNextStackOffset(); + + // Calculate by how many bytes the stack has to be adjusted in case of tail + // call optimization. + int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); + + // Adjust the stack pointer for the new arguments... + // These operations are automatically eliminated by the prolog/epilog pass + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + SDValue CallSeqStart = Chain; + + // Load the return address and frame pointer so it can be moved somewhere else + // later. + SDValue LROp, FPOp; + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, + dl); + + // Set up a copy of the stack pointer for use loading and storing any + // arguments that may not fit in the registers available for argument + // passing. + SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); + + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + SmallVector<TailCallArgumentInfo, 8> TailCallArguments; + SmallVector<SDValue, 8> MemOpChains; + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, j = 0, e = ArgLocs.size(); + i != e; + ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = TheCall->getArg(i); + ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); + + if (Flags.isByVal()) { + // Argument is an aggregate which is passed by value, thus we need to + // create a copy of it in the local variable space of the current stack + // frame (which is the stack frame of the caller) and pass the address of + // this copy to the callee. + assert((j < ByValArgLocs.size()) && "Index out of bounds!"); + CCValAssign &ByValVA = ByValArgLocs[j++]; + assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); + + // Memory reserved in the local variable space of the callers stack frame. + unsigned LocMemOffset = ByValVA.getLocMemOffset(); + + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + + // Create a copy of the argument in the local area of the current + // stack frame. + SDValue MemcpyCall = + CreateCopyOfByValArgument(Arg, PtrOff, + CallSeqStart.getNode()->getOperand(0), + Flags, DAG, dl); + + // This must go outside the CALLSEQ_START..END. + SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, + CallSeqStart.getNode()->getOperand(1)); + DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), + NewCallSeqStart.getNode()); + Chain = CallSeqStart = NewCallSeqStart; + + // Pass the address of the aggregate copy on the stack either in a + // physical register or in the parameter list area of the current stack + // frame to the callee. + Arg = PtrOff; + } + + if (VA.isRegLoc()) { + // Put argument in a physical register. + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + // Put argument in the parameter list area of the current stack frame. + assert(VA.isMemLoc()); + unsigned LocMemOffset = VA.getLocMemOffset(); + + if (!isTailCall) { + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + PseudoSourceValue::getStack(), LocMemOffset)); + } else { + // Calculate and remember argument location. + CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, + TailCallArguments); + } + } + } + + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // Set CR6 to true if this is a vararg call. + if (isVarArg) { + SDValue SetCR(DAG.getTargetNode(PPC::CRSET, dl, MVT::i32), 0); + Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag); + InFlag = Chain.getValue(1); + } + + if (isTailCall) { + PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, + false, TailCallArguments); + } + + return FinishCall(DAG, TheCall, TM, RegsToPass, Op, InFlag, Chain, Callee, + SPDiff, NumBytes); +} + +SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget, + TargetMachine &TM) { + CallSDNode *TheCall = cast<CallSDNode>(Op.getNode()); + SDValue Chain = TheCall->getChain(); + bool isVarArg = TheCall->isVarArg(); + unsigned CC = TheCall->getCallingConv(); + bool isTailCall = TheCall->isTailCall() + && CC == CallingConv::Fast && PerformTailCallOpt; + SDValue Callee = TheCall->getCallee(); + unsigned NumOps = TheCall->getNumArgs(); + DebugLoc dl = TheCall->getDebugLoc(); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; @@ -2144,10 +2745,6 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, MachineFunction &MF = DAG.getMachineFunction(); - // args_to_use will accumulate outgoing args for the PPCISD::CALL case in - // SelectExpr to use to put the arguments in the appropriate registers. - std::vector<SDValue> args_to_use; - // Mark this function as potentially containing a function that contains a // tail call. As a consequence the frame pointer will be used for dynamicalloc // and restoring the callers stack pointer in this functions epilog. This is @@ -2162,8 +2759,8 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // area, and parameter passing area. We start with 24/48 bytes, which is // prereserved space for [SP][CR][LR][3 x unused]. unsigned NumBytes = - CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isMachoABI, isVarArg, CC, - TheCall, nAltivecParamsAtEnd); + CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CC, TheCall, + nAltivecParamsAtEnd); // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. @@ -2177,7 +2774,8 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // Load the return address and frame pointer so it can be move somewhere else // later. SDValue LROp, FPOp; - Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, + dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument @@ -2192,7 +2790,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. - unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); + unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; static const unsigned GPR_32[] = { // 32-bit registers. @@ -2210,12 +2808,12 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned NumGPRs = array_lengthof(GPR_32); - const unsigned NumFPRs = isMachoABI ? 13 : 8; - const unsigned NumVRs = array_lengthof( VR); + const unsigned NumFPRs = 13; + const unsigned NumVRs = array_lengthof(VR); const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; - std::vector<std::pair<unsigned, SDValue> > RegsToPass; + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<TailCallArgumentInfo, 8> TailCallArguments; SmallVector<SDValue, 8> MemOpChains; @@ -2223,19 +2821,12 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, bool inMem = false; SDValue Arg = TheCall->getArg(i); ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); - // See if next argument requires stack alignment in ELF - bool Align = Flags.isSplit(); // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff; - // Stack align in ELF 32 - if (isELF32_ABI && Align) - PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize, - StackPtr.getValueType()); - else - PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); @@ -2246,11 +2837,9 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); } - // FIXME Elf untested, what are alignment rules? // FIXME memcpy is used way more than necessary. Correctness first. if (Flags.isByVal()) { unsigned Size = Flags.getByValSize(); - if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2); if (Size==1 || Size==2) { // Very small objects are passed right-justified. // Everything else is passed left-justified. @@ -2260,14 +2849,14 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, NULL, 0, VT); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); - if (isMachoABI) - ArgOffset += PtrByteSize; + + ArgOffset += PtrByteSize; } else { SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr, CallSeqStart.getNode()->getOperand(0), - Flags, DAG, Size, dl); + Flags, DAG, dl); // This must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, CallSeqStart.getNode()->getOperand(1)); @@ -2283,7 +2872,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // registers. (This is not what the doc says.) SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, CallSeqStart.getNode()->getOperand(0), - Flags, DAG, Size, dl); + Flags, DAG, dl); // This must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, CallSeqStart.getNode()->getOperand(1)); @@ -2297,8 +2886,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); - if (isMachoABI) - ArgOffset += PtrByteSize; + ArgOffset += PtrByteSize; } else { ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; break; @@ -2311,8 +2899,6 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, default: assert(0 && "Unexpected ValueType for argument!"); case MVT::i32: case MVT::i64: - // Double word align in ELF - if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2); if (GPR_idx != NumGPRs) { RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); } else { @@ -2321,13 +2907,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, TailCallArguments, dl); inMem = true; } - if (inMem || isMachoABI) { - // Stack align in ELF - if (isELF32_ABI && Align) - ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; - - ArgOffset += PtrByteSize; - } + ArgOffset += PtrByteSize; break; case MVT::f32: case MVT::f64: @@ -2342,28 +2922,24 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, if (GPR_idx != NumGPRs) { SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0); MemOpChains.push_back(Load.getValue(1)); - if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], - Load)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0); MemOpChains.push_back(Load.getValue(1)); - if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], - Load)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } } else { // If we have any FPRs remaining, we may also have GPRs remaining. // Args passed in FPRs consume either 1 (f32) or 2 (f64) available // GPRs. - if (isMachoABI) { - if (GPR_idx != NumGPRs) - ++GPR_idx; - if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && - !isPPC64) // PPC64 has 64-bit GPR's obviously :) - ++GPR_idx; - } + if (GPR_idx != NumGPRs) + ++GPR_idx; + if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && + !isPPC64) // PPC64 has 64-bit GPR's obviously :) + ++GPR_idx; } } else { LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, @@ -2371,15 +2947,10 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, TailCallArguments, dl); inMem = true; } - if (inMem || isMachoABI) { - // Stack align in ELF - if (isELF32_ABI && Align) - ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; - if (isPPC64) - ArgOffset += 8; - else - ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; - } + if (isPPC64) + ArgOffset += 8; + else + ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; break; case MVT::v4f32: case MVT::v4i32: @@ -2475,148 +3046,13 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, InFlag = Chain.getValue(1); } - // With the ELF 32 ABI, set CR6 to true if this is a vararg call. - if (isVarArg && isELF32_ABI) { - SDValue SetCR(DAG.getTargetNode(PPC::CRSET, dl, MVT::i32), 0); - Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag); - InFlag = Chain.getValue(1); - } - - // Emit a sequence of copyto/copyfrom virtual registers for arguments that - // might overwrite each other in case of tail call optimization. if (isTailCall) { - SmallVector<SDValue, 8> MemOpChains2; - // Do not flag preceeding copytoreg stuff together with the following stuff. - InFlag = SDValue(); - StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, - MemOpChains2, dl); - if (!MemOpChains2.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains2[0], MemOpChains2.size()); - - // Store the return address to the appropriate stack slot. - Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, - isPPC64, isMachoABI, dl); - } - - // Emit callseq_end just before tailcall node. - if (isTailCall) { - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag); - InFlag = Chain.getValue(1); - } - - std::vector<MVT> NodeTys; - NodeTys.push_back(MVT::Other); // Returns a chain - NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. - - SmallVector<SDValue, 8> Ops; - unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF; - - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every - // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol - // node so that legalize doesn't hack it. - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType()); - else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); - else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) - // If this is an absolute destination address, use the munged value. - Callee = SDValue(Dest, 0); - else { - // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair - // to do the call, we can't use PPCISD::CALL. - SDValue MTCTROps[] = {Chain, Callee, InFlag}; - Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, - 2 + (InFlag.getNode() != 0)); - InFlag = Chain.getValue(1); - - // Copy the callee address into R12/X12 on darwin. - if (isMachoABI) { - unsigned Reg = Callee.getValueType() == MVT::i32 ? PPC::R12 : PPC::X12; - Chain = DAG.getCopyToReg(Chain, dl, Reg, Callee, InFlag); - InFlag = Chain.getValue(1); - } - - NodeTys.clear(); - NodeTys.push_back(MVT::Other); - NodeTys.push_back(MVT::Flag); - Ops.push_back(Chain); - CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF; - Callee.setNode(0); - // Add CTR register as callee so a bctr can be emitted later. - if (isTailCall) - Ops.push_back(DAG.getRegister(PPC::CTR, getPointerTy())); - } - - // If this is a direct call, pass the chain and the callee. - if (Callee.getNode()) { - Ops.push_back(Chain); - Ops.push_back(Callee); + PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, + FPOp, true, TailCallArguments); } - // If this is a tail call add stack pointer delta. - if (isTailCall) - Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); - // Add argument registers to the end of the list so that they are known live - // into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - // When performing tail call optimization the callee pops its arguments off - // the stack. Account for this here so these bytes can be pushed back on in - // PPCRegisterInfo::eliminateCallFramePseudoInstr. - int BytesCalleePops = - (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0; - - if (InFlag.getNode()) - Ops.push_back(InFlag); - - // Emit tail call. - if (isTailCall) { - assert(InFlag.getNode() && - "Flag must be set. Depend on flag being set in LowerRET"); - Chain = DAG.getNode(PPCISD::TAILCALL, dl, - TheCall->getVTList(), &Ops[0], Ops.size()); - return SDValue(Chain.getNode(), Op.getResNo()); - } - - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); - - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(BytesCalleePops, true), - InFlag); - if (TheCall->getValueType(0) != MVT::Other) - InFlag = Chain.getValue(1); - - SmallVector<SDValue, 16> ResultVals; - SmallVector<CCValAssign, 16> RVLocs; - unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv(); - CCState CCInfo(CallerCC, isVarArg, TM, RVLocs); - CCInfo.AnalyzeCallResult(TheCall, RetCC_PPC); - - // Copy all of the result registers out of their specified physreg. - for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { - CCValAssign &VA = RVLocs[i]; - MVT VT = VA.getValVT(); - assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyFromReg(Chain, dl, - VA.getLocReg(), VT, InFlag).getValue(1); - ResultVals.push_back(Chain.getValue(0)); - InFlag = Chain.getValue(2); - } - - // If the function returns void, just return the chain. - if (RVLocs.empty()) - return Chain; - - // Otherwise, merge everything together with a MERGE_VALUES node. - ResultVals.push_back(Chain); - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), - &ResultVals[0], ResultVals.size()); - return Res.getValue(Op.getResNo()); + return FinishCall(DAG, TheCall, TM, RegsToPass, Op, InFlag, Chain, Callee, + SPDiff, NumBytes); } SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG, @@ -2716,7 +3152,7 @@ SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool IsPPC64 = PPCSubTarget.isPPC64(); - bool isMachoABI = PPCSubTarget.isMachoABI(); + bool isDarwinABI = PPCSubTarget.isDarwinABI(); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Get current frame pointer save index. The users of this index will be @@ -2727,7 +3163,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { // If the frame pointer save index hasn't been defined yet. if (!RASI) { // Find out what the fix offset of the frame pointer save area. - int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isMachoABI); + int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset); // Save the result. @@ -2740,7 +3176,7 @@ SDValue PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool IsPPC64 = PPCSubTarget.isPPC64(); - bool isMachoABI = PPCSubTarget.isMachoABI(); + bool isDarwinABI = PPCSubTarget.isDarwinABI(); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Get current frame pointer save index. The users of this index will be @@ -2751,7 +3187,8 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { // If the frame pointer save index hasn't been defined yet. if (!FPSI) { // Find out what the fix offset of the frame pointer save area. - int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI); + int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, + isDarwinABI); // Allocate the frame index for frame pointer save area. FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); @@ -3729,12 +4166,22 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); case ISD::FORMAL_ARGUMENTS: - return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex, - VarArgsStackOffset, VarArgsNumGPR, - VarArgsNumFPR, PPCSubTarget); + if (PPCSubTarget.isSVR4ABI()) { + return LowerFORMAL_ARGUMENTS_SVR4(Op, DAG, VarArgsFrameIndex, + VarArgsStackOffset, VarArgsNumGPR, + VarArgsNumFPR, PPCSubTarget); + } else { + return LowerFORMAL_ARGUMENTS_Darwin(Op, DAG, VarArgsFrameIndex, + PPCSubTarget); + } - case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget, - getTargetMachine()); + case ISD::CALL: + if (PPCSubTarget.isSVR4ABI()) { + return LowerCALL_SVR4(Op, DAG, PPCSubTarget, getTargetMachine()); + } else { + return LowerCALL_Darwin(Op, DAG, PPCSubTarget, getTargetMachine()); + } + case ISD::RET: return LowerRET(Op, DAG, getTargetMachine()); case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); case ISD::DYNAMIC_STACKALLOC: @@ -4878,3 +5325,13 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The PowerPC target isn't yet aware of offsets. return false; } + +MVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, + bool isSrcConst, bool isSrcStr, + SelectionDAG &DAG) const { + if (this->PPCSubTarget.isPPC64()) { + return MVT::i64; + } else { + return MVT::i32; + } +} diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index b6d046f..962bbb1 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -82,7 +82,7 @@ namespace llvm { STD_32, /// CALL - A direct function call. - CALL_Macho, CALL_ELF, + CALL_Darwin, CALL_SVR4, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. @@ -90,7 +90,7 @@ namespace llvm { /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a /// BCTRL instruction. - BCTRL_Macho, BCTRL_ELF, + BCTRL_Darwin, BCTRL_SVR4, /// Return with a flag operand, matched by 'blr' RET_FLAG, @@ -223,7 +223,6 @@ namespace llvm { // register for parameter passing. unsigned VarArgsNumFPR; // Index of the first unused double // register for parameter passing. - int ReturnAddrIndex; // FrameIndex for return slot. const PPCSubtarget &PPCSubTarget; public: explicit PPCTargetLowering(PPCTargetMachine &TM); @@ -336,6 +335,13 @@ namespace llvm { SelectionDAG &DAG) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + + virtual MVT getOptimalMemOpType(uint64_t Size, unsigned Align, + bool isSrcConst, bool isSrcStr, + SelectionDAG &DAG) const; + + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *F) const; private: SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; @@ -346,6 +352,7 @@ namespace llvm { SDValue Chain, SDValue &LROpOut, SDValue &FPOpOut, + bool isDarwinABI, DebugLoc dl); SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG); @@ -363,14 +370,19 @@ namespace llvm { SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, int VarArgsFrameIndex, int VarArgsStackOffset, unsigned VarArgsNumGPR, unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget); - SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, - int &VarArgsFrameIndex, - int &VarArgsStackOffset, - unsigned &VarArgsNumGPR, - unsigned &VarArgsNumFPR, - const PPCSubtarget &Subtarget); - SDValue LowerCALL(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget, TargetMachine &TM); + SDValue LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, SelectionDAG &DAG, + int &VarArgsFrameIndex, + int &VarArgsStackOffset, + unsigned &VarArgsNumGPR, + unsigned &VarArgsNumFPR, + const PPCSubtarget &Subtarget); + SDValue LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, SelectionDAG &DAG, + int &VarArgsFrameIndex, + const PPCSubtarget &Subtarget); + SDValue LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget, TargetMachine &TM); + SDValue LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget, TargetMachine &TM); SDValue LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM); SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget); diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 417c8ed..3823e53 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -61,7 +61,7 @@ let Defs = [LR8] in def MovePCtoLR8 : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>, PPC970_Unit_BRU; -// Macho ABI Calls. +// Darwin ABI Calls. let isCall = 1, PPC970_Unit = 7, // All calls clobber the PPC64 non-callee saved registers. Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12, @@ -71,22 +71,22 @@ let isCall = 1, PPC970_Unit = 7, CR0,CR1,CR5,CR6,CR7] in { // Convenient aliases for call instructions let Uses = [RM] in { - def BL8_Macho : IForm<18, 0, 1, - (outs), (ins calltarget:$func, variable_ops), - "bl $func", BrB, []>; // See Pat patterns below. - def BLA8_Macho : IForm<18, 1, 1, - (outs), (ins aaddr:$func, variable_ops), - "bla $func", BrB, [(PPCcall_Macho (i64 imm:$func))]>; + def BL8_Darwin : IForm<18, 0, 1, + (outs), (ins calltarget:$func, variable_ops), + "bl $func", BrB, []>; // See Pat patterns below. + def BLA8_Darwin : IForm<18, 1, 1, + (outs), (ins aaddr:$func, variable_ops), + "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>; } let Uses = [CTR8, RM] in { - def BCTRL8_Macho : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins variable_ops), - "bctrl", BrB, - [(PPCbctrl_Macho)]>, Requires<[In64BitMode]>; + def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, + (outs), (ins variable_ops), + "bctrl", BrB, + [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>; } } -// ELF 64 ABI Calls = Macho ABI Calls +// ELF 64 ABI Calls = Darwin ABI Calls // Used to define BL8_ELF and BLA8_ELF let isCall = 1, PPC970_Unit = 7, // All calls clobber the PPC64 non-callee saved registers. @@ -102,26 +102,26 @@ let isCall = 1, PPC970_Unit = 7, "bl $func", BrB, []>; // See Pat patterns below. def BLA8_ELF : IForm<18, 1, 1, (outs), (ins aaddr:$func, variable_ops), - "bla $func", BrB, [(PPCcall_ELF (i64 imm:$func))]>; + "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; } let Uses = [CTR8, RM] in { def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins variable_ops), "bctrl", BrB, - [(PPCbctrl_ELF)]>, Requires<[In64BitMode]>; + [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>; } } // Calls -def : Pat<(PPCcall_Macho (i64 tglobaladdr:$dst)), - (BL8_Macho tglobaladdr:$dst)>; -def : Pat<(PPCcall_Macho (i64 texternalsym:$dst)), - (BL8_Macho texternalsym:$dst)>; +def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)), + (BL8_Darwin tglobaladdr:$dst)>; +def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)), + (BL8_Darwin texternalsym:$dst)>; -def : Pat<(PPCcall_ELF (i64 tglobaladdr:$dst)), +def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)), (BL8_ELF tglobaladdr:$dst)>; -def : Pat<(PPCcall_ELF (i64 texternalsym:$dst)), +def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)), (BL8_ELF texternalsym:$dst)>; // Atomic operations diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 778f034..87c612a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -444,21 +444,29 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, // not cause any bug. If we need other uses of CR bits, the following // code may be invalid. unsigned Reg = 0; - if (SrcReg >= PPC::CR0LT || SrcReg <= PPC::CR0UN) + if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT || + SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN) Reg = PPC::CR0; - else if (SrcReg >= PPC::CR1LT || SrcReg <= PPC::CR1UN) + else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT || + SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN) Reg = PPC::CR1; - else if (SrcReg >= PPC::CR2LT || SrcReg <= PPC::CR2UN) + else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT || + SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN) Reg = PPC::CR2; - else if (SrcReg >= PPC::CR3LT || SrcReg <= PPC::CR3UN) + else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT || + SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN) Reg = PPC::CR3; - else if (SrcReg >= PPC::CR4LT || SrcReg <= PPC::CR4UN) + else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT || + SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN) Reg = PPC::CR4; - else if (SrcReg >= PPC::CR5LT || SrcReg <= PPC::CR5UN) + else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT || + SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN) Reg = PPC::CR5; - else if (SrcReg >= PPC::CR6LT || SrcReg <= PPC::CR6UN) + else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT || + SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN) Reg = PPC::CR6; - else if (SrcReg >= PPC::CR7LT || SrcReg <= PPC::CR7UN) + else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT || + SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) Reg = PPC::CR7; return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, @@ -587,21 +595,29 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, } else if (RC == PPC::CRBITRCRegisterClass) { unsigned Reg = 0; - if (DestReg >= PPC::CR0LT || DestReg <= PPC::CR0UN) + if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT || + DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN) Reg = PPC::CR0; - else if (DestReg >= PPC::CR1LT || DestReg <= PPC::CR1UN) + else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT || + DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN) Reg = PPC::CR1; - else if (DestReg >= PPC::CR2LT || DestReg <= PPC::CR2UN) + else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT || + DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN) Reg = PPC::CR2; - else if (DestReg >= PPC::CR3LT || DestReg <= PPC::CR3UN) + else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT || + DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN) Reg = PPC::CR3; - else if (DestReg >= PPC::CR4LT || DestReg <= PPC::CR4UN) + else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT || + DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN) Reg = PPC::CR4; - else if (DestReg >= PPC::CR5LT || DestReg <= PPC::CR5UN) + else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT || + DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN) Reg = PPC::CR5; - else if (DestReg >= PPC::CR6LT || DestReg <= PPC::CR6UN) + else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT || + DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN) Reg = PPC::CR6; - else if (DestReg >= PPC::CR7LT || DestReg <= PPC::CR7UN) + else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT || + DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN) Reg = PPC::CR7; return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, @@ -691,16 +707,21 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (OpNum == 0) { // move -> store unsigned InReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STW)) - .addReg(InReg, getKillRegState(isKill)), + .addReg(InReg, + getKillRegState(isKill) | + getUndefRegState(isUndef)), FrameIndex); } else { // move -> load unsigned OutReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LWZ)) .addReg(OutReg, RegState::Define | - getDeadRegState(isDead)), + getDeadRegState(isDead) | + getUndefRegState(isUndef)), FrameIndex); } } else if ((Opc == PPC::OR8 && @@ -708,48 +729,63 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (OpNum == 0) { // move -> store unsigned InReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STD)) - .addReg(InReg, getKillRegState(isKill)), + .addReg(InReg, + getKillRegState(isKill) | + getUndefRegState(isUndef)), FrameIndex); } else { // move -> load unsigned OutReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LD)) .addReg(OutReg, RegState::Define | - getDeadRegState(isDead)), + getDeadRegState(isDead) | + getUndefRegState(isUndef)), FrameIndex); } } else if (Opc == PPC::FMRD) { if (OpNum == 0) { // move -> store unsigned InReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFD)) - .addReg(InReg, getKillRegState(isKill)), + .addReg(InReg, + getKillRegState(isKill) | + getUndefRegState(isUndef)), FrameIndex); } else { // move -> load unsigned OutReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFD)) .addReg(OutReg, RegState::Define | - getDeadRegState(isDead)), + getDeadRegState(isDead) | + getUndefRegState(isUndef)), FrameIndex); } } else if (Opc == PPC::FMRS) { if (OpNum == 0) { // move -> store unsigned InReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFS)) - .addReg(InReg, getKillRegState(isKill)), + .addReg(InReg, + getKillRegState(isKill) | + getUndefRegState(isUndef)), FrameIndex); } else { // move -> load unsigned OutReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFS)) .addReg(OutReg, RegState::Define | - getDeadRegState(isDead)), + getDeadRegState(isDead) | + getUndefRegState(isUndef)), FrameIndex); } } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 772e25a..7af59a2 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -107,17 +107,17 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; -def PPCcall_Macho : SDNode<"PPCISD::CALL_Macho", SDT_PPCCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; -def PPCcall_ELF : SDNode<"PPCISD::CALL_ELF", SDT_PPCCall, +def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; -def PPCbctrl_Macho : SDNode<"PPCISD::BCTRL_Macho", SDTNone, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; -def PPCbctrl_ELF : SDNode<"PPCISD::BCTRL_ELF", SDTNone, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def PPCbctrl_SVR4 : SDNode<"PPCISD::BCTRL_SVR4", SDTNone, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInFlag]>; @@ -412,7 +412,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>; } -// Macho ABI Calls. +// Darwin ABI Calls. let isCall = 1, PPC970_Unit = 7, // All calls clobber the non-callee saved registers... Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, @@ -424,26 +424,26 @@ let isCall = 1, PPC970_Unit = 7, CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in { // Convenient aliases for call instructions let Uses = [RM] in { - def BL_Macho : IForm<18, 0, 1, - (outs), (ins calltarget:$func, variable_ops), - "bl $func", BrB, []>; // See Pat patterns below. - def BLA_Macho : IForm<18, 1, 1, + def BL_Darwin : IForm<18, 0, 1, + (outs), (ins calltarget:$func, variable_ops), + "bl $func", BrB, []>; // See Pat patterns below. + def BLA_Darwin : IForm<18, 1, 1, (outs), (ins aaddr:$func, variable_ops), - "bla $func", BrB, [(PPCcall_Macho (i32 imm:$func))]>; + "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>; } let Uses = [CTR, RM] in { - def BCTRL_Macho : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins variable_ops), - "bctrl", BrB, - [(PPCbctrl_Macho)]>, Requires<[In32BitMode]>; + def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, + (outs), (ins variable_ops), + "bctrl", BrB, + [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>; } } -// ELF ABI Calls. +// SVR4 ABI Calls. let isCall = 1, PPC970_Unit = 7, // All calls clobber the non-callee saved registers... - Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, - F0,F1,F2,F3,F4,F5,F6,F7,F8, + Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, + F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, LR,CTR, CR0,CR1,CR5,CR6,CR7, @@ -451,19 +451,19 @@ let isCall = 1, PPC970_Unit = 7, CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in { // Convenient aliases for call instructions let Uses = [RM] in { - def BL_ELF : IForm<18, 0, 1, + def BL_SVR4 : IForm<18, 0, 1, (outs), (ins calltarget:$func, variable_ops), "bl $func", BrB, []>; // See Pat patterns below. - def BLA_ELF : IForm<18, 1, 1, + def BLA_SVR4 : IForm<18, 1, 1, (outs), (ins aaddr:$func, variable_ops), "bla $func", BrB, - [(PPCcall_ELF (i32 imm:$func))]>; + [(PPCcall_SVR4 (i32 imm:$func))]>; } let Uses = [CTR, RM] in { - def BCTRL_ELF : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins variable_ops), - "bctrl", BrB, - [(PPCbctrl_ELF)]>, Requires<[In32BitMode]>; + def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1, + (outs), (ins variable_ops), + "bctrl", BrB, + [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>; } } @@ -1389,14 +1389,14 @@ def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm), (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>; // Calls -def : Pat<(PPCcall_Macho (i32 tglobaladdr:$dst)), - (BL_Macho tglobaladdr:$dst)>; -def : Pat<(PPCcall_Macho (i32 texternalsym:$dst)), - (BL_Macho texternalsym:$dst)>; -def : Pat<(PPCcall_ELF (i32 tglobaladdr:$dst)), - (BL_ELF tglobaladdr:$dst)>; -def : Pat<(PPCcall_ELF (i32 texternalsym:$dst)), - (BL_ELF texternalsym:$dst)>; +def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)), + (BL_Darwin tglobaladdr:$dst)>; +def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)), + (BL_Darwin texternalsym:$dst)>; +def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)), + (BL_SVR4 tglobaladdr:$dst)>; +def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)), + (BL_SVR4 texternalsym:$dst)>; def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 035647e..7486d74 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -354,7 +354,7 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1) JCE.emitWordBE(0x7d6802a6); // mflr r11 JCE.emitWordBE(0xf9610060); // std r11, 96(r1) - } else if (TM.getSubtargetImpl()->isMachoABI()){ + } else if (TM.getSubtargetImpl()->isDarwinABI()){ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1) JCE.emitWordBE(0x7d6802a6); // mflr r11 JCE.emitWordBE(0x91610028); // stw r11, 40(r1) diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index cb31506..97b1c57 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -149,7 +149,7 @@ const TargetRegisterClass *PPCRegisterInfo::getPointerRegClass() const { const unsigned* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { // 32-bit Darwin calling convention. - static const unsigned Macho32_CalleeSavedRegs[] = { + static const unsigned Darwin32_CalleeSavedRegs[] = { PPC::R13, PPC::R14, PPC::R15, PPC::R16, PPC::R17, PPC::R18, PPC::R19, PPC::R20, PPC::R21, PPC::R22, PPC::R23, @@ -174,15 +174,13 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::LR, 0 }; - static const unsigned ELF32_CalleeSavedRegs[] = { - PPC::R13, PPC::R14, PPC::R15, + static const unsigned SVR4_CalleeSavedRegs[] = { + PPC::R14, PPC::R15, PPC::R16, PPC::R17, PPC::R18, PPC::R19, PPC::R20, PPC::R21, PPC::R22, PPC::R23, PPC::R24, PPC::R25, PPC::R26, PPC::R27, PPC::R28, PPC::R29, PPC::R30, PPC::R31, - PPC::F9, - PPC::F10, PPC::F11, PPC::F12, PPC::F13, PPC::F14, PPC::F15, PPC::F16, PPC::F17, PPC::F18, PPC::F19, PPC::F20, PPC::F21, PPC::F22, PPC::F23, PPC::F24, PPC::F25, @@ -190,6 +188,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::F30, PPC::F31, PPC::CR2, PPC::CR3, PPC::CR4, + + PPC::VRSAVE, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31, @@ -201,7 +202,7 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::LR, 0 }; // 64-bit Darwin calling convention. - static const unsigned Macho64_CalleeSavedRegs[] = { + static const unsigned Darwin64_CalleeSavedRegs[] = { PPC::X14, PPC::X15, PPC::X16, PPC::X17, PPC::X18, PPC::X19, PPC::X20, PPC::X21, PPC::X22, PPC::X23, @@ -226,18 +227,17 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::LR8, 0 }; - if (Subtarget.isMachoABI()) - return Subtarget.isPPC64() ? Macho64_CalleeSavedRegs : - Macho32_CalleeSavedRegs; + if (Subtarget.isDarwinABI()) + return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs : + Darwin32_CalleeSavedRegs; - // ELF 32. - return ELF32_CalleeSavedRegs; + return SVR4_CalleeSavedRegs; } const TargetRegisterClass* const* PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - // 32-bit Macho calling convention. - static const TargetRegisterClass * const Macho32_CalleeSavedRegClasses[] = { + // 32-bit Darwin calling convention. + static const TargetRegisterClass * const Darwin32_CalleeSavedRegClasses[] = { &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, @@ -266,15 +266,13 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &PPC::GPRCRegClass, 0 }; - static const TargetRegisterClass * const ELF32_CalleeSavedRegClasses[] = { - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, + static const TargetRegisterClass * const SVR4_CalleeSavedRegClasses[] = { + &PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, @@ -283,6 +281,8 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, + &PPC::VRSAVERCRegClass, + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, @@ -297,8 +297,8 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &PPC::GPRCRegClass, 0 }; - // 64-bit Macho calling convention. - static const TargetRegisterClass * const Macho64_CalleeSavedRegClasses[] = { + // 64-bit Darwin calling convention. + static const TargetRegisterClass * const Darwin64_CalleeSavedRegClasses[] = { &PPC::G8RCRegClass,&PPC::G8RCRegClass, &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, @@ -327,12 +327,11 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &PPC::G8RCRegClass, 0 }; - if (Subtarget.isMachoABI()) - return Subtarget.isPPC64() ? Macho64_CalleeSavedRegClasses : - Macho32_CalleeSavedRegClasses; + if (Subtarget.isDarwinABI()) + return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegClasses : + Darwin32_CalleeSavedRegClasses; - // ELF 32. - return ELF32_CalleeSavedRegClasses; + return SVR4_CalleeSavedRegClasses; } // needsFP - Return true if the specified function should have a dedicated frame @@ -358,10 +357,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::LR8); Reserved.set(PPC::RM); - // In Linux, r2 is reserved for the OS. - if (!Subtarget.isDarwin()) - Reserved.set(PPC::R2); - + // The SVR4 ABI reserves r2 and r13 + if (Subtarget.isSVR4ABI()) { + Reserved.set(PPC::R2); // System-reserved register + Reserved.set(PPC::R13); // Small Data Area pointer register + } + // On PPC64, r13 is the thread pointer. Never allocate this register. Note // that this is over conservative, as it also prevents allocation of R31 when // the FP is not needed. @@ -909,6 +910,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const { // don't have a frame pointer, calls, or dynamic alloca then we do not need // to adjust the stack pointer (we fit in the Red Zone). bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone); + // FIXME SVR4 The SVR4 ABI has no red zone. if (!DisableRedZone && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. @@ -925,7 +927,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const { // Maximum call frame needs to be at least big enough for linkage and 8 args. unsigned minCallFrameSize = PPCFrameInfo::getMinCallFrameSize(Subtarget.isPPC64(), - Subtarget.isMachoABI()); + Subtarget.isDarwinABI()); maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); // If we have dynamic alloca then maxCallFrameSize needs to be aligned so @@ -958,16 +960,15 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); bool IsPPC64 = Subtarget.isPPC64(); - bool IsELF32_ABI = Subtarget.isELF32_ABI(); - bool IsMachoABI = Subtarget.isMachoABI(); + bool IsSVR4ABI = Subtarget.isSVR4ABI(); + bool isDarwinABI = Subtarget.isDarwinABI(); MachineFrameInfo *MFI = MF.getFrameInfo(); // If the frame pointer save index hasn't been defined yet. - if (!FPSI && (NoFramePointerElim || MFI->hasVarSizedObjects()) && - IsELF32_ABI) { + if (!FPSI && needsFP(MF) && IsSVR4ABI) { // Find out what the fix offset of the frame pointer save area. int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, - IsMachoABI); + isDarwinABI); // Allocate the frame index for frame pointer save area. FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); // Save the result. @@ -976,11 +977,10 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Reserve stack space to move the linkage area to in case of a tail call. int TCSPDelta = 0; - if (PerformTailCallOpt && (TCSPDelta=FI->getTailCallSPDelta()) < 0) { - int AddFPOffsetAmount = IsELF32_ABI ? -4 : 0; - MF.getFrameInfo()->CreateFixedObject( -1 * TCSPDelta, - AddFPOffsetAmount + TCSPDelta); + if (PerformTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { + MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta); } + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or // a large stack, which will require scavenging a register to materialize a // large offset. @@ -999,6 +999,170 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } void +PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) + const { + // Early exit if not using the SVR4 ABI. + if (!Subtarget.isSVR4ABI()) { + return; + } + + // Get callee saved register information. + MachineFrameInfo *FFI = MF.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); + + // Early exit if no callee saved registers are modified! + if (CSI.empty() && !needsFP(MF)) { + return; + } + + unsigned MinGPR = PPC::R31; + unsigned MinFPR = PPC::F31; + unsigned MinVR = PPC::V31; + + bool HasGPSaveArea = false; + bool HasFPSaveArea = false; + bool HasCRSaveArea = false; + bool HasVRSAVESaveArea = false; + bool HasVRSaveArea = false; + + SmallVector<CalleeSavedInfo, 18> GPRegs; + SmallVector<CalleeSavedInfo, 18> FPRegs; + SmallVector<CalleeSavedInfo, 18> VRegs; + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = CSI[i].getRegClass(); + + if (RC == PPC::GPRCRegisterClass) { + HasGPSaveArea = true; + + GPRegs.push_back(CSI[i]); + + if (Reg < MinGPR) { + MinGPR = Reg; + } + } else if (RC == PPC::F8RCRegisterClass) { + HasFPSaveArea = true; + + FPRegs.push_back(CSI[i]); + + if (Reg < MinFPR) { + MinFPR = Reg; + } +// FIXME SVR4: Disable CR save area for now. + } else if ( RC == PPC::CRBITRCRegisterClass + || RC == PPC::CRRCRegisterClass) { +// HasCRSaveArea = true; + } else if (RC == PPC::VRSAVERCRegisterClass) { + HasVRSAVESaveArea = true; + } else if (RC == PPC::VRRCRegisterClass) { + HasVRSaveArea = true; + + VRegs.push_back(CSI[i]); + + if (Reg < MinVR) { + MinVR = Reg; + } + } else { + assert(0 && "Unknown RegisterClass!"); + } + } + + PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); + + int64_t LowerBound = 0; + + // Take into account stack space reserved for tail calls. + int TCSPDelta = 0; + if (PerformTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { + LowerBound = TCSPDelta; + } + + // The Floating-point register save area is right below the back chain word + // of the previous stack frame. + if (HasFPSaveArea) { + for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { + int FI = FPRegs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + LowerBound -= (31 - getRegisterNumbering(MinFPR) + 1) * 8; + } + + // Check whether the frame pointer register is allocated. If so, make sure it + // is spilled to the correct offset. + if (needsFP(MF)) { + HasGPSaveArea = true; + + int FI = PFI->getFramePointerSaveIndex(); + assert(FI && "No Frame Pointer Save Slot!"); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + // General register save area starts right below the Floating-point + // register save area. + if (HasGPSaveArea) { + // Move general register save area spill slots down, taking into account + // the size of the Floating-point register save area. + for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { + int FI = GPRegs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + LowerBound -= (31 - getRegisterNumbering(MinGPR) + 1) * 4; + } + + // The CR save area is below the general register save area. + if (HasCRSaveArea) { + // FIXME SVR4: Is it actually possible to have multiple elements in CSI + // which have the CR/CRBIT register class? + // Adjust the frame index of the CR spill slot. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + const TargetRegisterClass *RC = CSI[i].getRegClass(); + + if (RC == PPC::CRBITRCRegisterClass || RC == PPC::CRRCRegisterClass) { + int FI = CSI[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + } + + LowerBound -= 4; // The CR save area is always 4 bytes long. + } + + if (HasVRSAVESaveArea) { + // FIXME SVR4: Is it actually possible to have multiple elements in CSI + // which have the VRSAVE register class? + // Adjust the frame index of the VRSAVE spill slot. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + const TargetRegisterClass *RC = CSI[i].getRegClass(); + + if (RC == PPC::VRSAVERCRegisterClass) { + int FI = CSI[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + } + + LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. + } + + if (HasVRSaveArea) { + // Insert alignment padding, we need 16-byte alignment. + LowerBound = (LowerBound - 15) & ~(15); + + for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { + int FI = VRegs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + } +} + +void PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -1033,15 +1197,26 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { // Get processor type. bool IsPPC64 = Subtarget.isPPC64(); // Get operating system - bool IsMachoABI = Subtarget.isMachoABI(); + bool isDarwinABI = Subtarget.isDarwinABI(); // Check if the link register (LR) must be saved. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); bool MustSaveLR = FI->mustSaveLR(); // Do we have a frame pointer for this function? bool HasFP = hasFP(MF) && FrameSize; - int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI); - int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI); + int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI); + + int FPOffset = 0; + if (HasFP) { + if (Subtarget.isSVR4ABI()) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + int FPIndex = FI->getFramePointerSaveIndex(); + assert(FPIndex && "No Frame Pointer Save Slot!"); + FPOffset = FFI->getObjectOffset(FPIndex); + } else { + FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI); + } + } if (IsPPC64) { if (MustSaveLR) @@ -1242,15 +1417,26 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, // Get processor type. bool IsPPC64 = Subtarget.isPPC64(); // Get operating system - bool IsMachoABI = Subtarget.isMachoABI(); + bool isDarwinABI = Subtarget.isDarwinABI(); // Check if the link register (LR) has been saved. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); bool MustSaveLR = FI->mustSaveLR(); // Do we have a frame pointer for this function? bool HasFP = hasFP(MF) && FrameSize; - int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI); - int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI); + int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI); + + int FPOffset = 0; + if (HasFP) { + if (Subtarget.isSVR4ABI()) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + int FPIndex = FI->getFramePointerSaveIndex(); + assert(FPIndex && "No Frame Pointer Save Slot!"); + FPOffset = FFI->getObjectOffset(FPIndex); + } else { + FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI); + } + } bool UsesTCRet = RetOpcode == PPC::TCRETURNri || RetOpcode == PPC::TCRETURNdi || diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 9506b65..ddaefdd 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -75,6 +75,8 @@ public: void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 9e15a55..bac8e3a 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -43,8 +43,9 @@ class VR<bits<5> num, string n> : PPCReg<n> { } // CR - One of the 8 4-bit condition registers -class CR<bits<3> num, string n> : PPCReg<n> { +class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> { field bits<3> Num = num; + let SubRegs = subregs; } // CRBIT - One of the 32 1-bit condition register fields @@ -189,16 +190,6 @@ def V29 : VR<29, "v29">, DwarfRegNum<[106]>; def V30 : VR<30, "v30">, DwarfRegNum<[107]>; def V31 : VR<31, "v31">, DwarfRegNum<[108]>; -// Condition registers -def CR0 : CR<0, "cr0">, DwarfRegNum<[68]>; -def CR1 : CR<1, "cr1">, DwarfRegNum<[69]>; -def CR2 : CR<2, "cr2">, DwarfRegNum<[70]>; -def CR3 : CR<3, "cr3">, DwarfRegNum<[71]>; -def CR4 : CR<4, "cr4">, DwarfRegNum<[72]>; -def CR5 : CR<5, "cr5">, DwarfRegNum<[73]>; -def CR6 : CR<6, "cr6">, DwarfRegNum<[74]>; -def CR7 : CR<7, "cr7">, DwarfRegNum<[75]>; - // Condition register bits def CR0LT : CRBIT< 0, "0">, DwarfRegNum<[0]>; def CR0GT : CRBIT< 1, "1">, DwarfRegNum<[0]>; @@ -233,6 +224,16 @@ def CR7GT : CRBIT<29, "29">, DwarfRegNum<[0]>; def CR7EQ : CRBIT<30, "30">, DwarfRegNum<[0]>; def CR7UN : CRBIT<31, "31">, DwarfRegNum<[0]>; +// Condition registers +def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68]>; +def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69]>; +def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70]>; +def CR3 : CR<3, "cr3", [CR3LT, CR3GT, CR3EQ, CR3UN]>, DwarfRegNum<[71]>; +def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72]>; +def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73]>; +def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74]>; +def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75]>; + def : SubRegSet<1, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], [CR0LT, CR1LT, CR2LT, CR3LT, CR4LT, CR5LT, CR6LT, CR7LT]>; def : SubRegSet<2, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], @@ -290,7 +291,12 @@ def GPRC : RegisterClass<"PPC", [i32], 32, // On PPC64, r13 is the thread pointer. Never allocate this register. // Note that this is overconservative, as it also prevents allocation of // R31 when the FP is not needed. - if (MF.getTarget().getSubtarget<PPCSubtarget>().isPPC64()) + // When using the SVR4 ABI, r13 is reserved for the Small Data Area + // pointer. + const PPCSubtarget &Subtarget + = MF.getTarget().getSubtarget<PPCSubtarget>(); + + if (Subtarget.isPPC64() || Subtarget.isSVR4ABI()) return end()-5; // don't allocate R13, R31, R0, R1, LR if (needsFP(MF)) @@ -324,22 +330,24 @@ def G8RC : RegisterClass<"PPC", [i64], 64, }]; } - - +// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4 +// ABI the size of the Floating-point register save area is determined by the +// allocated non-volatile register with the lowest register number, as FP +// register N is spilled to offset 8 * (32 - N) below the back chain word of the +// previous stack frame. By allocating non-volatiles in reverse order we make +// sure that the Floating-point register save area is always as small as +// possible because there aren't any unused spill slots. def F8RC : RegisterClass<"PPC", [f64], 64, [F0, F1, F2, F3, F4, F5, F6, F7, - F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, - F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>; + F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23, + F22, F21, F20, F19, F18, F17, F16, F15, F14]>; def F4RC : RegisterClass<"PPC", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7, - F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, - F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>; + F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23, + F22, F21, F20, F19, F18, F17, F16, F15, F14]>; def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128, [V2, V3, V4, V5, V0, V1, - V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V20, V21, - V22, V23, V24, V25, V26, V27, V28, V29, V30, V31]>; - -def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, - CR3, CR4]>; + V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, + V29, V28, V27, V26, V25, V24, V23, V22, V21, V20]>; def CRBITRC : RegisterClass<"PPC", [i32], 32, [CR0LT, CR0GT, CR0EQ, CR0UN, @@ -355,6 +363,13 @@ def CRBITRC : RegisterClass<"PPC", [i32], 32, let CopyCost = -1; } +def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, + CR3, CR4]> +{ + let SubRegClassList = [CRBITRC, CRBITRC, CRBITRC, CRBITRC]; +} def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>; def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>; +def VRSAVERC : RegisterClass<"PPC", [i32], 32, [VRSAVE]>; + diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 176f3e1..f633cc6 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -148,8 +148,8 @@ public: /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard. unsigned getDarwinVers() const { return DarwinVers; } - bool isMachoABI() const { return isDarwin() || IsPPC64; } - bool isELF32_ABI() const { return !isDarwin() && !IsPPC64; } + bool isDarwinABI() const { return isDarwin() || IsPPC64; } + bool isSVR4ABI() const { return !isDarwin() && !IsPPC64; } unsigned getAsmFlavor() const { return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 2f95d7e..e9073d6 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -152,7 +152,7 @@ bool PPCTargetMachine::addAssemblyEmitter(PassManagerBase &PM, raw_ostream &Out) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); + PM.add(AsmPrinterCtor(Out, *this, Verbose)); return false; } @@ -183,7 +183,7 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; @@ -215,7 +215,7 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; @@ -230,7 +230,7 @@ bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; @@ -245,7 +245,7 @@ bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 086d2f4..c693bf4 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -46,7 +46,6 @@ protected: // set this functions to ctor pointer at startup time if they are linked in. typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, PPCTargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose); static AsmPrinterCtorFn AsmPrinterCtor; diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index 688fb30..6e9e6c7 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -180,6 +180,16 @@ void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); } ===-------------------------------------------------------------------------=== +Darwin Stub removal: + +We still generate calls to foo$stub, and stubs, on Darwin. This is not +necessary when building with the Leopard (10.5) or later linker, as stubs are +generated by ld when necessary. Parameterizing this based on the deployment +target (-mmacosx-version-min) is probably enough. x86-32 does this right, see +its logic. + +===-------------------------------------------------------------------------=== + Darwin Stub LICM optimization: Loops like this: diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp index cb23f62..71bd0de 100644 --- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp @@ -50,9 +50,8 @@ namespace { unsigned BBNumber; public: explicit SparcAsmPrinter(raw_ostream &O, TargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : AsmPrinter(O, TM, T, OL, V), BBNumber(0) {} + const TargetAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V), BBNumber(0) {} virtual const char *getPassName() const { return "Sparc Assembly Printer"; @@ -84,9 +83,8 @@ namespace { /// FunctionPass *llvm::createSparcCodePrinterPass(raw_ostream &o, TargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose) { - return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); + return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); } @@ -109,7 +107,7 @@ bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Print out the label for the function. const Function *F = MF.getFunction(); SwitchToSection(TAI->SectionForGlobal(F)); - EmitAlignment(4, F); + EmitAlignment(MF.getAlignment(), F); O << "\t.globl\t" << CurrentFnName << '\n'; printVisibility(CurrentFnName, F->getVisibility()); diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index eefa7e8..eb045e2 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -21,3 +21,5 @@ add_llvm_target(SparcCodeGen SparcTargetAsmInfo.cpp SparcTargetMachine.cpp ) + +target_link_libraries (LLVMSparcCodeGen LLVMSelectionDAG) diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h index bb03f30..c7d0ca8 100644 --- a/lib/Target/Sparc/Sparc.h +++ b/lib/Target/Sparc/Sparc.h @@ -25,7 +25,6 @@ namespace llvm { FunctionPass *createSparcISelDag(SparcTargetMachine &TM); FunctionPass *createSparcCodePrinterPass(raw_ostream &OS, TargetMachine &TM, - CodeGenOpt::Level OptLevel, bool Verbose); FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM); FunctionPass *createSparcFPMoverPass(TargetMachine &TM); diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 3ec7e06..4c3efde 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -1047,3 +1047,8 @@ SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The Sparc target isn't yet aware of offsets. return false; } + +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned SparcTargetLowering::getFunctionAlignment(const Function *) const { + return 4; +} diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index fe6811f..27ce1b7 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -73,6 +73,9 @@ namespace llvm { MVT VT) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *F) const; }; } // end namespace llvm diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index d2f6b9b..12c286a 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -256,17 +256,20 @@ MachineInstr *SparcInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (OpNum == 0) { // COPY -> STORE unsigned SrcReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(isFloat ? SP::STFri : SP::STDFri)) .addFrameIndex(FI) .addImm(0) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)); } else { // COPY -> LOAD unsigned DstReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(isFloat ? SP::LDFri : SP::LDDFri)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) + .addReg(DstReg, RegState::Define | + getDeadRegState(isDead) | getUndefRegState(isUndef)) .addFrameIndex(FI) .addImm(0); } diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index aef238d..1343bcc 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -90,6 +90,6 @@ bool SparcTargetMachine::addAssemblyEmitter(PassManagerBase &PM, // Output assembly language. assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); + PM.add(AsmPrinterCtor(Out, *this, Verbose)); return false; } diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 8afcc73..ee55d3c 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -39,7 +39,6 @@ protected: // set this functions to ctor pointer at startup time if they are linked in. typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, TargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose); static AsmPrinterCtorFn AsmPrinterCtor; diff --git a/lib/Target/TargetELFWriterInfo.cpp b/lib/Target/TargetELFWriterInfo.cpp index 9651e65..3631b35 100644 --- a/lib/Target/TargetELFWriterInfo.cpp +++ b/lib/Target/TargetELFWriterInfo.cpp @@ -24,13 +24,3 @@ TargetELFWriterInfo::TargetELFWriterInfo(TargetMachine &tm) : TM(tm) { TargetELFWriterInfo::~TargetELFWriterInfo() {} -/// getFunctionAlignment - Returns the alignment for function 'F', targets -/// with different alignment constraints should overload this method -unsigned TargetELFWriterInfo::getFunctionAlignment(const Function *F) const { - const TargetData *TD = TM.getTargetData(); - unsigned FnAlign = F->getAlignment(); - unsigned TDAlign = TD->getPointerABIAlignment(); - unsigned Align = std::max(FnAlign, TDAlign); - assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); - return Align; -} diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp index e75cfc5..127f228 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp @@ -154,21 +154,13 @@ void X86ATTAsmPrinter::decorateName(std::string &Name, } } - - void X86ATTAsmPrinter::emitFunctionHeader(const MachineFunction &MF) { + unsigned FnAlign = MF.getAlignment(); const Function *F = MF.getFunction(); decorateName(CurrentFnName, F); SwitchToSection(TAI->SectionForGlobal(F)); - - // FIXME: A function's alignment should be part of MachineFunction. There - // shouldn't be a policy decision here. - unsigned FnAlign = 4; - if (F->hasFnAttr(Attribute::OptimizeForSize)) - FnAlign = 1; - switch (F->getLinkage()) { default: assert(0 && "Unknown linkage type!"); case Function::InternalLinkage: // Symbols default to internal. diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h index bd96115..f47daf1 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h @@ -38,9 +38,8 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter { MCStreamer *Streamer; public: explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : AsmPrinter(O, TM, T, OL, V) { + const TargetAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V) { Subtarget = &TM.getSubtarget<X86Subtarget>(); Context = 0; Streamer = 0; @@ -140,6 +139,9 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter { void printi128mem(const MachineInstr *MI, unsigned OpNo) { printMemReference(MI, OpNo); } + void printi256mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } void printf32mem(const MachineInstr *MI, unsigned OpNo) { printMemReference(MI, OpNo); } @@ -152,6 +154,9 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter { void printf128mem(const MachineInstr *MI, unsigned OpNo) { printMemReference(MI, OpNo); } + void printf256mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } void printlea32mem(const MachineInstr *MI, unsigned OpNo) { printLeaMemReference(MI, OpNo); } diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index d1623d6..e5d80a4 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -25,15 +25,12 @@ using namespace llvm; /// FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o, X86TargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose) { const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>(); if (Subtarget->isFlavorIntel()) - return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(), - OptLevel, verbose); - return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(), - OptLevel, verbose); + return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); + return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); } namespace { diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp index ceae7be..9d4df93 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp @@ -132,6 +132,7 @@ bool X86IntelAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Print out labels for the function. const Function *F = MF.getFunction(); unsigned CC = F->getCallingConv(); + unsigned FnAlign = MF.getAlignment(); // Populate function information map. Actually, We don't want to populate // non-stdcall or non-fastcall functions' information right now. @@ -141,10 +142,6 @@ bool X86IntelAsmPrinter::runOnMachineFunction(MachineFunction &MF) { decorateName(CurrentFnName, F); SwitchToTextSection("_text", F); - - unsigned FnAlign = 4; - if (F->hasFnAttr(Attribute::OptimizeForSize)) - FnAlign = 1; switch (F->getLinkage()) { default: assert(0 && "Unsupported linkage type!"); case Function::PrivateLinkage: diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h index 04f2595..a724c3c 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h @@ -26,9 +26,8 @@ namespace llvm { struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter { explicit X86IntelAsmPrinter(raw_ostream &O, X86TargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : AsmPrinter(O, TM, T, OL, V) {} + const TargetAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V) {} virtual const char *getPassName() const { return "X86 Intel-Style Assembly Printer"; @@ -76,6 +75,10 @@ struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter { O << "XMMWORD PTR "; printMemReference(MI, OpNo); } + void printi256mem(const MachineInstr *MI, unsigned OpNo) { + O << "YMMWORD PTR "; + printMemReference(MI, OpNo); + } void printf32mem(const MachineInstr *MI, unsigned OpNo) { O << "DWORD PTR "; printMemReference(MI, OpNo); @@ -92,6 +95,10 @@ struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter { O << "XMMWORD PTR "; printMemReference(MI, OpNo); } + void printf256mem(const MachineInstr *MI, unsigned OpNo) { + O << "YMMWORD PTR "; + printMemReference(MI, OpNo); + } void printlea32mem(const MachineInstr *MI, unsigned OpNo) { O << "DWORD PTR "; printLeaMemReference(MI, OpNo); diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index d982990..7ea0e51 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -27,3 +27,5 @@ add_llvm_target(X86CodeGen X86TargetMachine.cpp X86FastISel.cpp ) + +target_link_libraries (LLVMX86CodeGen LLVMSelectionDAG) diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index fd13b02..22de3f6 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -46,9 +46,7 @@ FunctionPass *createX87FPRegKillInserterPass(); /// assembly code for a MachineFunction to the given output stream, /// using the given target machine description. /// -FunctionPass *createX86CodePrinterPass(raw_ostream &o, - X86TargetMachine &tm, - CodeGenOpt::Level OptLevel, +FunctionPass *createX86CodePrinterPass(raw_ostream &o, X86TargetMachine &tm, bool Verbose); /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 4d26364..47861d5 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -119,6 +119,10 @@ def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A, Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A, Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"istanbul", [Feature3DNowA, Feature64Bit, FeatureSSE4A, + Feature3DNowA]>; +def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A, + Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureMMX]>; def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>; diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index 315118f..912ab0e 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -59,18 +59,6 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { return 0; } -unsigned X86ELFWriterInfo::getFunctionAlignment(const Function *F) const { - unsigned FnAlign = 4; - - if (F->hasFnAttr(Attribute::OptimizeForSize)) - FnAlign = 1; - - if (F->getAlignment()) - FnAlign = Log2_32(F->getAlignment()); - - return (1 << FnAlign); -} - long int X86ELFWriterInfo::getAddendForRelTy(unsigned RelTy) const { if (is64Bit) { switch(RelTy) { diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h index 96485b8..2ba1a0b 100644 --- a/lib/Target/X86/X86ELFWriterInfo.h +++ b/lib/Target/X86/X86ELFWriterInfo.h @@ -41,10 +41,6 @@ namespace llvm { X86ELFWriterInfo(TargetMachine &TM); virtual ~X86ELFWriterInfo(); - /// getFunctionAlignment - Returns the alignment for function 'F', targets - /// with different alignment constraints should overload this method - virtual unsigned getFunctionAlignment(const Function *F) const; - /// getRelocationType - Returns the target specific ELF Relocation type. /// 'MachineRelTy' contains the object code independent relocation type virtual unsigned getRelocationType(unsigned MachineRelTy) const; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 8a21b35..b336d78 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -452,25 +452,38 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { if (Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) { // Check to see if we've already materialized this // value in a register in this block. - if (unsigned Reg = LocalValueMap[V]) { - AM.Base.Reg = Reg; + DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V); + if (I != LocalValueMap.end() && I->second != 0) { + AM.Base.Reg = I->second; AM.GV = 0; return true; } - // Issue load from stub if necessary. + + // Issue load from stub. unsigned Opc = 0; const TargetRegisterClass *RC = NULL; + X86AddressMode StubAM; + StubAM.Base.Reg = AM.Base.Reg; + StubAM.GV = AM.GV; + if (TLI.getPointerTy() == MVT::i32) { Opc = X86::MOV32rm; RC = X86::GR32RegisterClass; + + if (Subtarget->isPICStyleGOT() && + TM.getRelocationModel() == Reloc::PIC_) + StubAM.GVOpFlags = X86II::MO_GOT; + } else { Opc = X86::MOV64rm; RC = X86::GR64RegisterClass; + + if (TM.getRelocationModel() != Reloc::Static) { + StubAM.GVOpFlags = X86II::MO_GOTPCREL; + StubAM.Base.Reg = X86::RIP; + } } - X86AddressMode StubAM; - StubAM.Base.Reg = AM.Base.Reg; - StubAM.GV = AM.GV; unsigned ResultReg = createResultReg(RC); addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), StubAM); @@ -1503,7 +1516,9 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { } else if (Subtarget->isPICStyleGOT()) { OpFlag = X86II::MO_GOTOFF; PICBase = getInstrInfo()->getGlobalBaseReg(&MF); - } + } else if (Subtarget->isPICStyleRIPRel() && + TM.getCodeModel() == CodeModel::Small) + PICBase = X86::RIP; } // Create the load from the constant pool. diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index ed4eb44..37027ee 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -990,16 +990,21 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { } case X86::FpSET_ST0_32: case X86::FpSET_ST0_64: - case X86::FpSET_ST0_80: + case X86::FpSET_ST0_80: { + unsigned Op0 = getFPReg(MI->getOperand(0)); + // FpSET_ST0_80 is generated by copyRegToReg for both function return // and inline assembly with the "st" constrain. In the latter case, - // it is possible for FP0 to be alive after this instruction. - if (!MI->killsRegister(X86::FP0)) { - // Duplicate ST0 - duplicateToTop(0, 0, I); + // it is possible for ST(0) to be alive after this instruction. + if (!MI->killsRegister(X86::FP0 + Op0)) { + // Duplicate Op0 + duplicateToTop(0, 7 /*temp register*/, I); + } else { + moveToTop(Op0, I); } --StackTop; // "Forget" we have something on the top of stack! break; + } case X86::FpSET_ST1_32: case X86::FpSET_ST1_64: case X86::FpSET_ST1_80: diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9614e69..5a6294a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -700,6 +700,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Do not attempt to custom lower non-power-of-2 vectors if (!isPowerOf2_32(VT.getVectorNumElements())) continue; + // Do not attempt to custom lower non-128-bit vectors + if (!VT.is128BitVector()) + continue; setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); @@ -718,17 +721,23 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. - for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { - setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v2i64); - setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v2i64); - setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v2i64); - setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v2i64); - setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v2i64); + for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) { + MVT VT = (MVT::SimpleValueType)i; + + // Do not attempt to promote non-128-bit vectors + if (!VT.is128BitVector()) { + continue; + } + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, MVT::v2i64); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, MVT::v2i64); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, MVT::v2i64); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, MVT::v2i64); + setOperationAction(ISD::SELECT, VT, Promote); + AddPromotedToType (ISD::SELECT, VT, MVT::v2i64); } setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -775,6 +784,114 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSETCC, MVT::v2i64, Custom); } + if (!UseSoftFloat && Subtarget->hasAVX()) { + addRegisterClass(MVT::v8f32, X86::VR256RegisterClass); + addRegisterClass(MVT::v4f64, X86::VR256RegisterClass); + addRegisterClass(MVT::v8i32, X86::VR256RegisterClass); + addRegisterClass(MVT::v4i64, X86::VR256RegisterClass); + + setOperationAction(ISD::LOAD, MVT::v8f32, Legal); + setOperationAction(ISD::LOAD, MVT::v8i32, Legal); + setOperationAction(ISD::LOAD, MVT::v4f64, Legal); + setOperationAction(ISD::LOAD, MVT::v4i64, Legal); + setOperationAction(ISD::FADD, MVT::v8f32, Legal); + setOperationAction(ISD::FSUB, MVT::v8f32, Legal); + setOperationAction(ISD::FMUL, MVT::v8f32, Legal); + setOperationAction(ISD::FDIV, MVT::v8f32, Legal); + setOperationAction(ISD::FSQRT, MVT::v8f32, Legal); + setOperationAction(ISD::FNEG, MVT::v8f32, Custom); + //setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom); + //setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Custom); + //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom); + //setOperationAction(ISD::SELECT, MVT::v8f32, Custom); + //setOperationAction(ISD::VSETCC, MVT::v8f32, Custom); + + // Operations to consider commented out -v16i16 v32i8 + //setOperationAction(ISD::ADD, MVT::v16i16, Legal); + setOperationAction(ISD::ADD, MVT::v8i32, Custom); + setOperationAction(ISD::ADD, MVT::v4i64, Custom); + //setOperationAction(ISD::SUB, MVT::v32i8, Legal); + //setOperationAction(ISD::SUB, MVT::v16i16, Legal); + setOperationAction(ISD::SUB, MVT::v8i32, Custom); + setOperationAction(ISD::SUB, MVT::v4i64, Custom); + //setOperationAction(ISD::MUL, MVT::v16i16, Legal); + setOperationAction(ISD::FADD, MVT::v4f64, Legal); + setOperationAction(ISD::FSUB, MVT::v4f64, Legal); + setOperationAction(ISD::FMUL, MVT::v4f64, Legal); + setOperationAction(ISD::FDIV, MVT::v4f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); + setOperationAction(ISD::FNEG, MVT::v4f64, Custom); + + setOperationAction(ISD::VSETCC, MVT::v4f64, Custom); + // setOperationAction(ISD::VSETCC, MVT::v32i8, Custom); + // setOperationAction(ISD::VSETCC, MVT::v16i16, Custom); + setOperationAction(ISD::VSETCC, MVT::v8i32, Custom); + + // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i8, Custom); + // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i16, Custom); + // setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f32, Custom); + + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i64, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Custom); + +#if 0 + // Not sure we want to do this since there are no 256-bit integer + // operations in AVX + + // Custom lower build_vector, vector_shuffle, and extract_vector_elt. + // This includes 256-bit vectors + for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) { + MVT VT = (MVT::SimpleValueType)i; + + // Do not attempt to custom lower non-power-of-2 vectors + if (!isPowerOf2_32(VT.getVectorNumElements())) + continue; + + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + } + + if (Subtarget->is64Bit()) { + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom); + } +#endif + +#if 0 + // Not sure we want to do this since there are no 256-bit integer + // operations in AVX + + // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64. + // Including 256-bit vectors + for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) { + MVT VT = (MVT::SimpleValueType)i; + + if (!VT.is256BitVector()) { + continue; + } + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, MVT::v4i64); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, MVT::v4i64); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, MVT::v4i64); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, MVT::v4i64); + setOperationAction(ISD::SELECT, VT, Promote); + AddPromotedToType (ISD::SELECT, VT, MVT::v4i64); + } + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); +#endif + } + // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -805,6 +922,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::MEMBARRIER); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); @@ -909,6 +1027,11 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, return Table; } +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { + return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4; +} + //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -5690,7 +5813,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, Args.push_back(Entry); std::pair<SDValue,SDValue> CallResult = LowerCallTo(Chain, Type::VoidTy, false, false, false, false, - CallingConv::C, false, + 0, CallingConv::C, false, DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); return CallResult.second; } @@ -8454,6 +8577,58 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +// On X86 and X86-64, atomic operations are lowered to locked instructions. +// Locked instructions, in turn, have implicit fence semantics (all memory +// operations are flushed before issuing the locked instruction, and the +// are not buffered), so we can fold away the common pattern of +// fence-atomic-fence. +static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) { + SDValue atomic = N->getOperand(0); + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + break; + default: + return SDValue(); + } + + SDValue fence = atomic.getOperand(0); + if (fence.getOpcode() != ISD::MEMBARRIER) + return SDValue(); + + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2), + atomic.getOperand(3)); + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2)); + default: + return SDValue(); + } +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -8472,6 +8647,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FAND: return PerformFANDCombine(N, DAG); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); + case ISD::MEMBARRIER: return PerformMEMBARRIERCombine(N, DAG); } return SDValue(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index fb4eb68..ffed46c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -380,7 +380,7 @@ namespace llvm { MVT getOptimalMemOpType(uint64_t Size, unsigned Align, bool isSrcConst, bool isSrcStr, SelectionDAG &DAG) const; - + /// LowerOperation - Provide custom lowering hooks for some operations. /// virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); @@ -533,7 +533,10 @@ namespace llvm { , SmallSet<Instruction*, 8> & #endif ); - + + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *F) const; + private: /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index b50dd65..6359542 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -49,8 +49,10 @@ struct X86AddressMode { unsigned IndexReg; unsigned Disp; GlobalValue *GV; + unsigned GVOpFlags; - X86AddressMode() : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0) { + X86AddressMode() + : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) { Base.Reg = 0; } }; @@ -113,7 +115,7 @@ inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB, assert (0); MIB.addImm(AM.Scale).addReg(AM.IndexReg); if (AM.GV) - return MIB.addGlobalAddress(AM.GV, AM.Disp); + return MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); else return MIB.addImm(AM.Disp); } diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 21f71ec..e5d84c5 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2459,7 +2459,8 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, getDefRegState(MO.isDef()) | RegState::Implicit | getKillRegState(MO.isKill()) | - getDeadRegState(MO.isDead())); + getDeadRegState(MO.isDead()) | + getUndefRegState(MO.isUndef())); } // Change CMP32ri r, 0 back to TEST32rr r, r, etc. unsigned NewOpc = 0; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index a6b0880..03df10d 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -180,10 +180,12 @@ def i16mem : X86MemOperand<"printi16mem">; def i32mem : X86MemOperand<"printi32mem">; def i64mem : X86MemOperand<"printi64mem">; def i128mem : X86MemOperand<"printi128mem">; +def i256mem : X86MemOperand<"printi256mem">; def f32mem : X86MemOperand<"printf32mem">; def f64mem : X86MemOperand<"printf64mem">; def f80mem : X86MemOperand<"printf80mem">; def f128mem : X86MemOperand<"printf128mem">; +def f256mem : X86MemOperand<"printf256mem">; // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of // plain GR64, so that it doesn't potentially require a REX prefix. diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 996baa0..2e6f017 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -157,6 +157,24 @@ let Namespace = "X86" in { def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>; def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; + // YMM Registers, used by AVX instructions + def YMM0: Register<"ymm0">, DwarfRegNum<[17, 21, 21]>; + def YMM1: Register<"ymm1">, DwarfRegNum<[18, 22, 22]>; + def YMM2: Register<"ymm2">, DwarfRegNum<[19, 23, 23]>; + def YMM3: Register<"ymm3">, DwarfRegNum<[20, 24, 24]>; + def YMM4: Register<"ymm4">, DwarfRegNum<[21, 25, 25]>; + def YMM5: Register<"ymm5">, DwarfRegNum<[22, 26, 26]>; + def YMM6: Register<"ymm6">, DwarfRegNum<[23, 27, 27]>; + def YMM7: Register<"ymm7">, DwarfRegNum<[24, 28, 28]>; + def YMM8: Register<"ymm8">, DwarfRegNum<[25, -2, -2]>; + def YMM9: Register<"ymm9">, DwarfRegNum<[26, -2, -2]>; + def YMM10: Register<"ymm10">, DwarfRegNum<[27, -2, -2]>; + def YMM11: Register<"ymm11">, DwarfRegNum<[28, -2, -2]>; + def YMM12: Register<"ymm12">, DwarfRegNum<[29, -2, -2]>; + def YMM13: Register<"ymm13">, DwarfRegNum<[30, -2, -2]>; + def YMM14: Register<"ymm14">, DwarfRegNum<[31, -2, -2]>; + def YMM15: Register<"ymm15">, DwarfRegNum<[32, -2, -2]>; + // Floating point stack registers def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>; def ST1 : Register<"st(1)">, DwarfRegNum<[34, 13, 12]>; @@ -229,6 +247,11 @@ def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>; +def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + //===----------------------------------------------------------------------===// // Register Class Definitions... now that we have all of the pieces, define the // top-level register classes. The order specified in the register list is @@ -755,6 +778,10 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, } }]; } +def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256, + [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, + YMM12, YMM13, YMM14, YMM15]>; // Status flags registers. def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index f4f6cce..0d1434f 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -142,7 +142,7 @@ public: bool hasSSE4A() const { return HasSSE4A; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } - bool hasAVX() const { return hasAVX(); } + bool hasAVX() const { return HasAVX; } bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 67dcd01..b000914 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -226,7 +226,7 @@ bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM, assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); + PM.add(AsmPrinterCtor(Out, *this, Verbose)); return false; } @@ -254,7 +254,7 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; @@ -284,7 +284,7 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; @@ -298,7 +298,7 @@ bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; @@ -312,7 +312,7 @@ bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index ba73ca8..90a5cc2 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -45,7 +45,6 @@ protected: // set this functions to ctor pointer at startup time if they are linked in. typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, X86TargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose); static AsmPrinterCtorFn AsmPrinterCtor; diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h index 5722b87..d95aab3 100644 --- a/lib/Target/XCore/XCore.h +++ b/lib/Target/XCore/XCore.h @@ -26,7 +26,6 @@ namespace llvm { FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM); FunctionPass *createXCoreCodePrinterPass(raw_ostream &OS, XCoreTargetMachine &TM, - CodeGenOpt::Level OptLevel, bool Verbose); } // end namespace llvm; diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index 4ab5d75..67cb0c8 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -58,9 +58,8 @@ namespace { const XCoreSubtarget &Subtarget; public: explicit XCoreAsmPrinter(raw_ostream &O, XCoreTargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : AsmPrinter(O, TM, T, OL, V), DW(0), + const TargetAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V), DW(0), Subtarget(*TM.getSubtargetImpl()) {} virtual const char *getPassName() const { @@ -106,9 +105,8 @@ namespace { /// FunctionPass *llvm::createXCoreCodePrinterPass(raw_ostream &o, XCoreTargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose) { - return new XCoreAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); + return new XCoreAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); } // PrintEscapedString - Print each character of the specified string, escaping @@ -277,7 +275,7 @@ emitFunctionStart(MachineFunction &MF) break; } // (1 << 1) byte aligned - EmitAlignment(1, F, 1); + EmitAlignment(MF.getAlignment(), F, 1); if (TAI->hasDotTypeDotSizeDirective()) { O << "\t.type " << CurrentFnName << ",@function\n"; } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 93c5f59..cc11d32 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -187,6 +187,12 @@ void XCoreTargetLowering::ReplaceNodeResults(SDNode *N, } } +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned XCoreTargetLowering:: +getFunctionAlignment(const Function *) const { + return 1; +} + //===----------------------------------------------------------------------===// // Misc Lower Operation implementation //===----------------------------------------------------------------------===// diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 993ecbd..753ea81 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -84,6 +84,9 @@ namespace llvm { virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const; + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *F) const; + private: const XCoreTargetMachine &TM; const XCoreSubtarget &Subtarget; diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 09227d9..b72225f 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -69,6 +69,6 @@ bool XCoreTargetMachine::addAssemblyEmitter(PassManagerBase &PM, bool Verbose, raw_ostream &Out) { // Output assembly language. - PM.add(createXCoreCodePrinterPass(Out, *this, OptLevel, Verbose)); + PM.add(createXCoreCodePrinterPass(Out, *this, Verbose)); return false; } |