diff options
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
31 files changed, 1168 insertions, 533 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index 6838084..2d747091 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -59,6 +59,8 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", "FP compare + branch is slow">; def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", "Floating point unit supports single precision only">; +def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", + "Enable support for TrustZone security extensions">; // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better @@ -144,29 +146,33 @@ include "ARMSchedule.td" def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", "Cortex-A5 ARM processors", [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, - FeatureVMLxForwarding, FeatureT2XtPk]>; + FeatureVMLxForwarding, FeatureT2XtPk, + FeatureTrustZone]>; def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", "Cortex-A8 ARM processors", [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, - FeatureVMLxForwarding, FeatureT2XtPk]>; + FeatureVMLxForwarding, FeatureT2XtPk, + FeatureTrustZone]>; def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", "Cortex-A9 ARM processors", [FeatureVMLxForwarding, FeatureT2XtPk, FeatureFP16, - FeatureAvoidPartialCPSR]>; + FeatureAvoidPartialCPSR, + FeatureTrustZone]>; def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", "Swift ARM processors", [FeatureNEONForFP, FeatureT2XtPk, FeatureVFP4, FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureAvoidMOVsShOp, - FeatureHasSlowFPVMLx]>; + FeatureHasSlowFPVMLx, FeatureTrustZone]>; // FIXME: It has not been determined if A15 has these features. def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", "Cortex-A15 ARM processors", [FeatureT2XtPk, FeatureFP16, - FeatureAvoidPartialCPSR]>; + FeatureAvoidPartialCPSR, + FeatureTrustZone]>; def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", "Cortex-R5 ARM processors", [FeatureSlowFPBrcc, FeatureHWDivARM, diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 9e68ff4..6005054 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -283,14 +283,20 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, return false; --I; } - if (!isUnpredicatedTerminator(I)) - return false; // Get the last instruction in the block. MachineInstr *LastInst = I; + unsigned LastOpc = LastInst->getOpcode(); + + // Check if it's an indirect branch first, this should return 'unanalyzable' + // even if it's predicated. + if (isIndirectBranchOpcode(LastOpc)) + return true; + + if (!isUnpredicatedTerminator(I)) + return false; // If there is only one terminator instruction, process it. - unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (isUncondBranchOpcode(LastOpc)) { TBB = LastInst->getOperand(0).getMBB(); @@ -747,10 +753,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Mov->addRegisterKilled(SrcReg, TRI); } -static const -MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, - unsigned Reg, unsigned SubIdx, unsigned State, - const TargetRegisterInfo *TRI) { +const MachineInstrBuilder & +ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, + unsigned SubIdx, unsigned State, + const TargetRegisterInfo *TRI) const { if (!SubIdx) return MIB.addReg(Reg, State); @@ -795,12 +801,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); - MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); - AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + if (Subtarget.hasV5TEOps()) { + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD)); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); + + AddDefaultPred(MIB); + } else { + // Fallback to STM instruction, which has existed since the dawn of + // time. + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) + .addFrameIndex(FI).addMemOperand(MMO)); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + } } else llvm_unreachable("Unknown reg class!"); break; @@ -948,7 +964,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = @@ -975,12 +990,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { - unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA; - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + MachineInstrBuilder MIB; + + if (Subtarget.hasV5TEOps()) { + MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); + AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); + AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); + + AddDefaultPred(MIB); + } else { + // Fallback to LDM instruction, which has existed since the dawn of + // time. + MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA)) + .addFrameIndex(FI).addMemOperand(MMO)); + MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + } + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); } else diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 7c107bb..2ef659c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -141,6 +141,10 @@ public: MachineInstr *commuteInstruction(MachineInstr*, bool=false) const; + const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg, + unsigned SubIdx, unsigned State, + const TargetRegisterInfo *TRI) const; + virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1, const MachineRegisterInfo *MRI) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b6b27f8..b0d34a7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -75,6 +75,12 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { } const uint32_t* +ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { + return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; +} + +const uint32_t* ARMBaseRegisterInfo::getNoPreservedMask() const { return CSR_NoRegs_RegMask; } diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 725033b..0679919 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -96,6 +96,7 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; + const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; const uint32_t *getNoPreservedMask() const; BitVector getReservedRegs(const MachineFunction &MF) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h index e6e8c3d..4f94ad2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h @@ -74,9 +74,15 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 }; static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 }; static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 }; + static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); if (Reg == 0) { + + // If we had R3 unallocated only, now we still must to waste it. + Reg = State.AllocateReg(GPRArgRegs, 4); + assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64"); + // For the 2nd half of a v2f64, do not just fail. if (CanFail) return false; diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td index b378b96..8ff666e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td @@ -111,8 +111,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[ // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register // (and the same is true for f64 if VFP is not enabled) CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>, - CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&" - "ArgFlags.getOrigAlign() != 8", + CCIfType<[i32], CCIf<"ArgFlags.getOrigAlign() != 8", CCAssignToReg<[R0, R1, R2, R3]>>>, CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>, @@ -195,10 +194,21 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>; def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, (sequence "D%u", 15, 8))>; +// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' +// and the pointer return value are both passed in R0 in these cases, this can +// be partially modelled by treating R0 as a callee-saved register +// Only the resulting RegMask is used; the SaveList is ignored +def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, + R5, R4, (sequence "D%u", 15, 8), + R0)>; + // iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register. // Also save R7-R4 first to match the stack frame fixed spill areas. def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; +def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, + (sub CSR_AAPCS_ThisReturn, R9))>; + // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around // add is a workaround for not being able to compile empty list: diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index 29fcd40..5d45f64 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -144,8 +144,8 @@ class ARMFastISel : public FastISel { virtual bool TargetSelectInstruction(const Instruction *I); virtual unsigned TargetMaterializeConstant(const Constant *C); virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); - virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, - const LoadInst *LI); + virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI); virtual bool FastLowerArguments(); private: #include "ARMGenFastISel.inc" @@ -2605,7 +2605,7 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned Opc; bool isBoolZext = false; - const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32); + const TargetRegisterClass *RC; switch (SrcVT.SimpleTy) { default: return 0; case MVT::i16: @@ -2797,12 +2797,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { return false; } -/// TryToFoldLoad - The specified machine instr operand is a vreg, and that +/// \brief The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, /// try to fold the load as an operand to the instruction, returning true if /// successful. -bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, - const LoadInst *LI) { +bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) { // Verify we have a legal type before going any further. MVT VT; if (!isLoadTypeLegal(LI->getType(), VT)) diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 7a02adf..483802b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -141,7 +141,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -159,8 +159,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { return; // Allocate the vararg register save area. This is not counted in NumBytes. - if (VARegSaveSize) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize, + if (ArgRegsSaveSize) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, MachineInstr::FrameSetup); if (!AFI->hasStackFrame()) { @@ -357,7 +357,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); @@ -471,8 +471,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MBBI = NewMI; } - if (VARegSaveSize) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); + if (ArgRegsSaveSize) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize); } /// getFrameIndexReference - Provide a base+offset reference to an FI slot for @@ -1003,7 +1003,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; + bool isVarArg = AFI->getArgRegsSaveSize() > 0; unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); // The emitPopInst calls below do not insert reloads for the aligned DPRCS2 @@ -1174,7 +1174,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (AFI->isThumb1OnlyFunction()) { // Spill LR if Thumb1 function uses variable length argument lists. - if (AFI->getVarArgsRegSaveSize() > 0) + if (AFI->getArgRegsSaveSize() > 0) MRI.setPhysRegUsed(ARM::LR); // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 2c51de2..9e1782e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1469,14 +1469,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { SDValue Ops[]= { Base, AMOpc, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, - MVT::i32, MVT::Other, Ops, 5); + MVT::i32, MVT::Other, Ops); } else { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, - MVT::i32, MVT::Other, Ops, 6); + MVT::i32, MVT::Other, Ops); } } @@ -1525,7 +1525,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { SDValue Ops[]= { Base, Offset, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32, - MVT::Other, Ops, 5); + MVT::Other, Ops); } return NULL; @@ -1539,7 +1539,7 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32); const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } /// \brief Form a D register from a pair of S registers. @@ -1550,7 +1550,7 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32); const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } /// \brief Form a quad register from a pair of D registers. @@ -1560,7 +1560,7 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } /// \brief Form 4 consecutive D registers from a pair of Q registers. @@ -1570,7 +1570,7 @@ SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } /// \brief Form 4 consecutive S registers. @@ -1585,7 +1585,7 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32); const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } /// \brief Form 4 consecutive D registers. @@ -1599,7 +1599,7 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32); const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } /// \brief Form 4 consecutive Q registers. @@ -1613,7 +1613,7 @@ SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32); const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand @@ -1761,7 +1761,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(Reg0); Ops.push_back(Chain); - VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); } else { // Otherwise, quad registers are loaded with two separate instructions, @@ -1774,7 +1774,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, - ResTy, AddrTy, MVT::Other, OpsA, 7); + ResTy, AddrTy, MVT::Other, OpsA); Chain = SDValue(VLdA, 2); // Load the odd subregs. @@ -1791,8 +1791,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(Reg0); Ops.push_back(Chain); - VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, - Ops.data(), Ops.size()); + VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); } // Transfer memoperands. @@ -1913,8 +1912,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(Reg0); Ops.push_back(Chain); - SDNode *VSt = - CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); // Transfer memoperands. cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1); @@ -1939,7 +1937,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, MemAddr.getValueType(), - MVT::Other, OpsA, 7); + MVT::Other, OpsA); cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1); Chain = SDValue(VStA, 1); @@ -1958,7 +1956,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Reg0); Ops.push_back(Chain); SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, - Ops.data(), Ops.size()); + Ops); cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1); return VStB; } @@ -2063,8 +2061,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : QOpcodes[OpcodeIndex]); - SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, - Ops.data(), Ops.size()); + SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1); if (!IsLoad) return VLdLn; @@ -2150,8 +2147,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, if (isUpdating) ResTys.push_back(MVT::i32); ResTys.push_back(MVT::Other); - SDNode *VLdDup = - CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1); SuperReg = SDValue(VLdDup, 0); @@ -2197,7 +2193,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); Ops.push_back(getAL(CurDAG)); // predicate Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register - return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size()); + return CurDAG->getMachineNode(Opc, dl, VT, Ops); } SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, @@ -2542,7 +2538,7 @@ SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), MVT::i32, MVT::i32, MVT::Other, - Ops.data() ,Ops.size()); + Ops); cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); return ResNode; } @@ -2599,7 +2595,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, - Ops, 4); + Ops); } else { SDValue Ops[] = { CPIdx, @@ -2609,7 +2605,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->getEntryNode() }; ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, - Ops, 5); + Ops); } ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0)); return NULL; @@ -2719,7 +2715,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { MVT::i32); SDValue Ops[] = { N0.getOperand(0), Imm16, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4); + return CurDAG->getMachineNode(Opc, dl, VT, Ops); } } break; @@ -2733,16 +2729,15 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; if (Subtarget->isThumb()) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), - CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4); + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops); } else { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, - dl, MVT::i32, MVT::i32, Ops, 5); + dl, MVT::i32, MVT::i32, Ops); } } case ISD::SMUL_LOHI: { @@ -2751,14 +2746,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (Subtarget->isThumb()) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4); + return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops); } else { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, - dl, MVT::i32, MVT::i32, Ops, 5); + dl, MVT::i32, MVT::i32, Ops); } } case ARMISD::UMLAL:{ @@ -2766,7 +2761,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32)}; - return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6); + return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops); }else{ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG), @@ -2774,7 +2769,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, - dl, MVT::i32, MVT::i32, Ops, 7); + dl, MVT::i32, MVT::i32, Ops); } } case ARMISD::SMLAL:{ @@ -2782,7 +2777,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32)}; - return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6); + return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops); }else{ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG), @@ -2790,7 +2785,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, - dl, MVT::i32, MVT::i32, Ops, 7); + dl, MVT::i32, MVT::i32, Ops); } } case ISD::LOAD: { @@ -2833,7 +2828,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { MVT::i32); SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, - MVT::Glue, Ops, 5); + MVT::Glue, Ops); Chain = SDValue(ResNode, 0); if (N->getNumValues() == 2) { InFlag = SDValue(ResNode, 1); @@ -2863,7 +2858,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); + return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops); } case ARMISD::VUZP: { unsigned Opc = 0; @@ -2883,7 +2878,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); + return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops); } case ARMISD::VTRN: { unsigned Opc = 0; @@ -2902,7 +2897,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); + return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops); } case ARMISD::BUILD_VECTOR: { EVT VecVT = N->getValueType(0); @@ -3147,8 +3142,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { Ops.push_back(getAL(CurDAG)); Ops.push_back(CurDAG->getRegister(0, MVT::i32)); Ops.push_back(Chain); - SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(), - Ops.size()); + SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); // Transfer memoperands. MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); @@ -3211,8 +3205,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD; - SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(), - Ops.size()); + SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); // Transfer memoperands. MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); @@ -3398,7 +3391,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { Ops.push_back(N->getOperand(1)); Ops.push_back(getAL(CurDAG)); // Predicate Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register - return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size()); + return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops); } case ARMISD::VTBL2: { DebugLoc dl = N->getDebugLoc(); @@ -3414,8 +3407,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { Ops.push_back(N->getOperand(2)); Ops.push_back(getAL(CurDAG)); // Predicate Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register - return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, - Ops.data(), Ops.size()); + return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops); } case ISD::CONCAT_VECTORS: diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index bb26090..e49cfc4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -729,7 +729,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { // membarrier needs custom lowering; the rest are legal and handled // normally. - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); // Custom lowering for 64-bit ops setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); @@ -747,7 +746,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setInsertFencesForAtomic(true); } else { // Set them all for expansion, which will force libcalls. - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); @@ -765,8 +763,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // Unordered/Monotonic case. setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); - // Since the libcalls include locking, fold in the fences - setShouldFoldAtomicFences(true); } setOperationAction(ISD::PREFETCH, MVT::Other, Custom); @@ -1238,7 +1234,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { + SmallVectorImpl<SDValue> &InVals, + bool isThisReturn, SDValue ThisVal) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -1252,6 +1249,15 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign VA = RVLocs[i]; + // Pass 'this' value directly from the argument to return value, to avoid + // reg unit interference + if (i == 0 && isThisReturn) { + assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && + "unexpected return calling convention register assignment"); + InVals.push_back(ThisVal); + continue; + } + SDValue Val; if (VA.needsCustom()) { // Handle f64 or half of a v2f64. @@ -1363,21 +1369,22 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); - bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); - bool IsSibCall = false; + bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); + bool isThisReturn = false; + bool isSibCall = false; // Disable tail calls if they're not supported. if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) isTailCall = false; if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), + isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG); // We don't support GuaranteedTailCallOpt for ARM, only automatically // detected sibcalls. if (isTailCall) { ++NumTailCalls; - IsSibCall = true; + isSibCall = true; } } @@ -1393,12 +1400,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned NumBytes = CCInfo.getNextStackOffset(); // For tail calls, memory operands are available in our caller's stack. - if (IsSibCall) + if (isSibCall) NumBytes = 0; // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - if (!IsSibCall) + if (!isSibCall) Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); @@ -1460,6 +1467,13 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, StackPtr, MemOpChains, Flags); } } else if (VA.isRegLoc()) { + if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) { + assert(VA.getLocVT() == MVT::i32 && + "unexpected calling convention register assignment"); + assert(!Ins.empty() && Ins[0].VT == MVT::i32 && + "unexpected use of 'returned'"); + isThisReturn = true; + } RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else if (isByVal) { assert(VA.isMemLoc()); @@ -1467,10 +1481,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // True if this byval aggregate will be split between registers // and memory. - if (CCInfo.isFirstByValRegValid()) { + unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); + unsigned CurByValIdx = CCInfo.getInRegsParamsProceed(); + + if (CurByValIdx < ByValArgsCount) { + + unsigned RegBegin, RegEnd; + CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); unsigned int i, j; - for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) { + for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { SDValue Const = DAG.getConstant(4*i, MVT::i32); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, @@ -1479,11 +1500,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(j, Load)); } - offset = ARM::R4 - CCInfo.getFirstByValReg(); - CCInfo.clearFirstByValReg(); + + // If parameter size outsides register area, "offset" value + // helps us to calculate stack slot for remained part properly. + offset = RegEnd - RegBegin; + + CCInfo.nextInRegsParam(); } - if (Flags.getByValSize() - 4*offset > 0) { + if (Flags.getByValSize() > 4*offset) { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, @@ -1499,7 +1524,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops, array_lengthof(Ops))); } - } else if (!IsSibCall) { + } else if (!isSibCall) { assert(VA.isMemLoc()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, @@ -1539,7 +1564,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } - InFlag =SDValue(); + InFlag = SDValue(); } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every @@ -1680,8 +1705,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. + const uint32_t *Mask; const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI); + if (isThisReturn) + // For 'this' returns, use the R0-preserving mask + Mask = ARI->getThisReturnPreservedMask(CallConv); + else + Mask = ARI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -1703,8 +1735,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Handle result values, copying them out of physregs into vregs that we // return. - return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, - dl, DAG, InVals); + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, + InVals, isThisReturn, + isThisReturn ? OutVals[0] : SDValue()); } /// HandleByVal - Every parameter *after* a byval parameter is passed @@ -1718,8 +1751,24 @@ ARMTargetLowering::HandleByVal( assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && "unhandled ParmContext"); - if ((!State->isFirstByValRegValid()) && - (ARM::R0 <= reg) && (reg <= ARM::R3)) { + + // For in-prologue parameters handling, we also introduce stack offset + // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal. + // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how + // NSAA should be evaluted (NSAA means "next stacked argument address"). + // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs. + // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs. + unsigned NSAAOffset = State->getNextStackOffset(); + if (State->getCallOrPrologue() != Call) { + for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) { + unsigned RB, RE; + State->getInRegsParamInfo(i, RB, RE); + assert(NSAAOffset >= (RE-RB)*4 && + "Stack offset for byval regs doesn't introduced anymore?"); + NSAAOffset -= (RE-RB)*4; + } + } + if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { if (Subtarget->isAAPCS_ABI() && Align > 4) { unsigned AlignInRegs = Align / 4; unsigned Waste = (ARM::R4 - reg) % AlignInRegs; @@ -1727,22 +1776,45 @@ ARMTargetLowering::HandleByVal( reg = State->AllocateReg(GPRArgRegs, 4); } if (reg != 0) { - State->setFirstByValReg(reg); + unsigned excess = 4 * (ARM::R4 - reg); + + // Special case when NSAA != SP and parameter size greater than size of + // all remained GPR regs. In that case we can't split parameter, we must + // send it to stack. We also must set NCRN to R4, so waste all + // remained registers. + if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { + while (State->AllocateReg(GPRArgRegs, 4)) + ; + return; + } + + // First register for byval parameter is the first register that wasn't + // allocated before this method call, so it would be "reg". + // If parameter is small enough to be saved in range [reg, r4), then + // the end (first after last) register would be reg + param-size-in-regs, + // else parameter would be splitted between registers and stack, + // end register would be r4 in this case. + unsigned ByValRegBegin = reg; + unsigned ByValRegEnd = (size < excess) ? reg + size/4 : ARM::R4; + State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); + // Note, first register is allocated in the beginning of function already, + // allocate remained amount of registers we need. + for (unsigned i = reg+1; i != ByValRegEnd; ++i) + State->AllocateReg(GPRArgRegs, 4); // At a call site, a byval parameter that is split between // registers and memory needs its size truncated here. In a // function prologue, such byval parameters are reassembled in // memory, and are not truncated. if (State->getCallOrPrologue() == Call) { - unsigned excess = 4 * (ARM::R4 - reg); - assert(size >= excess && "expected larger existing stack allocation"); - size -= excess; + // Make remained size equal to 0 in case, when + // the whole structure may be stored into registers. + if (size < excess) + size = 0; + else + size -= excess; } } } - // Confiscate any remaining parameter registers to preclude their - // assignment to subsequent parameters. - while (State->AllocateReg(GPRArgRegs, 4)) - ; } /// MatchingStackOffset - Return true if the given stack call argument is @@ -1874,7 +1946,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // local frame. const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction(). getInfo<ARMFunctionInfo>(); - if (AFI_Caller->getVarArgsRegSaveSize()) + if (AFI_Caller->getArgRegsSaveSize()) return false; // If the callee takes no arguments then go on to check the results of the @@ -2461,35 +2533,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, } } -static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) { - DebugLoc dl = Op.getDebugLoc(); - if (!Subtarget->hasDataBarrier()) { - // Some ARMv6 cpus can support data barriers with an mcr instruction. - // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get - // here. - assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && - "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); - return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(0, MVT::i32)); - } - - SDValue Op5 = Op.getOperand(5); - bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0; - unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); - bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0); - - ARM_MB::MemBOpt DMBOpt; - if (isDeviceBarrier) - DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY; - else - DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH; - return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(DMBOpt, MVT::i32)); -} - - static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { // FIXME: handle "fence singlethread" more efficiently. @@ -2586,12 +2629,16 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, void ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, - unsigned &VARegSize, unsigned &VARegSaveSize) + unsigned InRegsParamRecordIdx, + unsigned &ArgRegsSize, + unsigned &ArgRegsSaveSize) const { unsigned NumGPRs; - if (CCInfo.isFirstByValRegValid()) - NumGPRs = ARM::R4 - CCInfo.getFirstByValReg(); - else { + if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { + unsigned RBegin, REnd; + CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); + NumGPRs = REnd - RBegin; + } else { unsigned int firstUnalloced; firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, sizeof(GPRArgRegs) / @@ -2600,8 +2647,8 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, } unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); - VARegSize = NumGPRs * 4; - VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); + ArgRegsSize = NumGPRs * 4; + ArgRegsSaveSize = (ArgRegsSize + Align - 1) & ~(Align - 1); } // The remaining GPRs hold either the beginning of variable-argument @@ -2611,40 +2658,60 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, // If this is a variadic function, the va_list pointer will begin with // these values; otherwise, this reassembles a (byval) structure that // was split between registers and memory. -void -ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, - const Value *OrigArg, - unsigned OffsetFromOrigArg, - unsigned ArgOffset, - bool ForceMutable) const { +// Return: The frame index registers were stored into. +int +ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc dl, SDValue &Chain, + const Value *OrigArg, + unsigned InRegsParamRecordIdx, + unsigned OffsetFromOrigArg, + unsigned ArgOffset, + bool ForceMutable) const { + + // Currently, two use-cases possible: + // Case #1. Non var-args function, and we meet first byval parameter. + // Setup first unallocated register as first byval register; + // eat all remained registers + // (these two actions are performed by HandleByVal method). + // Then, here, we initialize stack frame with + // "store-reg" instructions. + // Case #2. Var-args function, that doesn't contain byval parameters. + // The same: eat all remained unallocated registers, + // initialize stack frame. + MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned firstRegToSaveIndex; - if (CCInfo.isFirstByValRegValid()) - firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0; - else { + unsigned firstRegToSaveIndex, lastRegToSaveIndex; + unsigned RBegin, REnd; + if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { + CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); + firstRegToSaveIndex = RBegin - ARM::R0; + lastRegToSaveIndex = REnd - ARM::R0; + } else { firstRegToSaveIndex = CCInfo.getFirstUnallocated (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); + lastRegToSaveIndex = 4; } - unsigned VARegSize, VARegSaveSize; - computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); - if (VARegSaveSize) { - // If this function is vararg, store any remaining integer argument regs - // to their spots on the stack so that they may be loaded by deferencing - // the result of va_next. - AFI->setVarArgsRegSaveSize(VARegSaveSize); - AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize, - ArgOffset + VARegSaveSize - - VARegSize, - false)); - SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), - getPointerTy()); + unsigned ArgRegsSize, ArgRegsSaveSize; + computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgRegsSize, ArgRegsSaveSize); + + // Store any by-val regs to their spots on the stack so that they may be + // loaded by deferencing the result of formal parameter pointer or va_next. + // Note: once stack area for byval/varargs registers + // was initialized, it can't be initialized again. + if (ArgRegsSaveSize) { + + int FrameIndex = MFI->CreateFixedObject( + ArgRegsSaveSize, + ArgOffset + ArgRegsSaveSize - ArgRegsSize, + false); + SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); SmallVector<SDValue, 4> MemOps; - for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) { + for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; + ++firstRegToSaveIndex, ++i) { const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = &ARM::tGPRRegClass; @@ -2661,13 +2728,37 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); } + + AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); + if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0], MemOps.size()); + return FrameIndex; } else // This will point to the next argument passed via stack. - AFI->setVarArgsFrameIndex( - MFI->CreateFixedObject(4, ArgOffset, !ForceMutable)); + return MFI->CreateFixedObject(4, ArgOffset, !ForceMutable); +} + +// Setup stack frame, the va_list pointer will start from. +void +ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc dl, SDValue &Chain, + unsigned ArgOffset, + bool ForceMutable) const { + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + // Try to store any remaining integer argument regs + // to their spots on the stack so that they may be loaded by deferencing + // the result of va_next. + // If there is no regs to be stored, just point address after last + // argument passed via stack. + int FrameIndex = + StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), + 0, ArgOffset, ForceMutable); + + AFI->setVarArgsFrameIndex(FrameIndex); } SDValue @@ -2696,6 +2787,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SDValue ArgValue; Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; + + // Initially ArgRegsSaveSize is zero. + // Then we increase this value each time we meet byval parameter. + // We also increase this value in case of varargs function. + AFI->setArgRegsSaveSize(0); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); @@ -2793,20 +2890,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Since they could be overwritten by lowering of arguments in case of // a tail call. if (Flags.isByVal()) { - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - if (!AFI->getVarArgsFrameIndex()) { - VarArgStyleRegisters(CCInfo, DAG, - dl, Chain, CurOrigArg, - Ins[VA.getValNo()].PartOffset, - VA.getLocMemOffset(), - true /*force mutable frames*/); - int VAFrameIndex = AFI->getVarArgsFrameIndex(); - InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy())); - } else { - int FI = MFI->CreateFixedObject(Flags.getByValSize(), - VA.getLocMemOffset(), false); - InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); - } + unsigned CurByValIndex = CCInfo.getInRegsParamsProceed(); + int FrameIndex = StoreByValRegs( + CCInfo, DAG, dl, Chain, CurOrigArg, + CurByValIndex, + Ins[VA.getValNo()].PartOffset, + VA.getLocMemOffset(), + true /*force mutable frames*/); + InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); + CCInfo.nextInRegsParam(); } else { int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, VA.getLocMemOffset(), true); @@ -2824,7 +2916,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // varargs if (isVarArg) - VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0, + VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset()); return Chain; @@ -5165,6 +5257,23 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { return false; } +static EVT getExtensionTo64Bits(const EVT &OrigVT) { + if (OrigVT.getSizeInBits() >= 64) + return OrigVT; + + assert(OrigVT.isSimple() && "Expecting a simple value type"); + + MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; + switch (OrigSimpleTy) { + default: llvm_unreachable("Unexpected Vector Type"); + case MVT::v2i8: + case MVT::v2i16: + return MVT::v2i32; + case MVT::v4i8: + return MVT::v4i16; + } +} + /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. /// We insert the required extension here to get the vector to fill a D register. @@ -5180,18 +5289,8 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, return N; // Must extend size to at least 64 bits to be used as an operand for VMULL. - MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy; - EVT NewVT; - switch (OrigSimpleTy) { - default: llvm_unreachable("Unexpected Orig Vector Type"); - case MVT::v2i8: - case MVT::v2i16: - NewVT = MVT::v2i32; - break; - case MVT::v4i8: - NewVT = MVT::v4i16; - break; - } + EVT NewVT = getExtensionTo64Bits(OrigTy); + return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N); } @@ -5201,22 +5300,22 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, /// reach a total size of 64 bits. We have to add the extension separately /// because ARM does not have a sign/zero extending load for vectors. static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { - SDValue NonExtendingLoad = - DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(), + EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT()); + + // The load already has the right type. + if (ExtendedTy == LD->getMemoryVT()) + return DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment()); - unsigned ExtOp = 0; - switch (LD->getExtensionType()) { - default: llvm_unreachable("Unexpected LoadExtType"); - case ISD::EXTLOAD: - case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break; - } - MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy; - MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy; - return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG, - MemType, ExtType, ExtOp); + + // We need to create a zextload/sextload. We cannot just create a load + // followed by a zext/zext node because LowerMUL is also run during normal + // operation legalization where we can't create illegal types. + return DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), ExtendedTy, + LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), + LD->getMemoryVT(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); } /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, @@ -5614,7 +5713,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); - case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); case ISD::SINT_TO_FP: diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 9ee17f0..426010e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -464,7 +464,8 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; + SmallVectorImpl<SDValue> &InVals, + bool isThisReturn, SDValue ThisVal) const; virtual SDValue LowerFormalArguments(SDValue Chain, @@ -473,16 +474,23 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc dl, SDValue &Chain, + const Value *OrigArg, + unsigned InRegsParamRecordIdx, + unsigned OffsetFromOrigArg, + unsigned ArgOffset, + bool ForceMutable) const; + void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, DebugLoc dl, SDValue &Chain, - const Value *OrigArg, - unsigned OffsetFromOrigArg, unsigned ArgOffset, - bool ForceMutable = false) - const; + bool ForceMutable = false) const; void computeRegArea(CCState &CCInfo, MachineFunction &MF, - unsigned &VARegSize, unsigned &VARegSaveSize) const; + unsigned InRegsParamRecordIdx, + unsigned &ArgRegsSize, + unsigned &ArgRegsSaveSize) const; virtual SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index 11550c5..1bd174e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -221,6 +221,9 @@ def HasDB : Predicate<"Subtarget->hasDataBarrier()">, def HasMP : Predicate<"Subtarget->hasMPExtension()">, AssemblerPredicate<"FeatureMP", "mp-extensions">; +def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">, + AssemblerPredicate<"FeatureTrustZone", + "TrustZone">; def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; def IsThumb : Predicate<"Subtarget->isThumb()">, @@ -578,6 +581,17 @@ def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; } def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; } def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; } +/// imm0_4 predicate - Immediate in the range [0,4]. +def Imm0_4AsmOperand : ImmAsmOperand +{ + let Name = "Imm0_4"; + let DiagnosticType = "ImmRange0_4"; +} +def imm0_4 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 5; }]> { + let ParserMatchClass = Imm0_4AsmOperand; + let DecoderMethod = "DecodeImm0_4"; +} + /// imm0_7 predicate - Immediate in the range [0,7]. def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; } def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ @@ -741,18 +755,26 @@ def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }], // addrmode_imm12 := reg +/- imm12 // def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; } -def addrmode_imm12 : Operand<i32>, +class AddrMode_Imm12 : Operand<i32>, ComplexPattern<i32, 2, "SelectAddrModeImm12", []> { // 12-bit immediate operand. Note that instructions using this encode // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other // immediate values are as normal. let EncoderMethod = "getAddrModeImm12OpValue"; - let PrintMethod = "printAddrModeImm12Operand"; let DecoderMethod = "DecodeAddrModeImm12Operand"; let ParserMatchClass = MemImm12OffsetAsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } + +def addrmode_imm12 : AddrMode_Imm12 { + let PrintMethod = "printAddrModeImm12Operand<false>"; +} + +def addrmode_imm12_pre : AddrMode_Imm12 { + let PrintMethod = "printAddrModeImm12Operand<true>"; +} + // ldst_so_reg := reg +/- reg shop imm // def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; } @@ -852,14 +874,23 @@ def am2offset_imm : Operand<i32>, // // FIXME: split into imm vs. reg versions. def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; } -def addrmode3 : Operand<i32>, - ComplexPattern<i32, 3, "SelectAddrMode3", []> { +class AddrMode3 : Operand<i32>, + ComplexPattern<i32, 3, "SelectAddrMode3", []> { let EncoderMethod = "getAddrMode3OpValue"; - let PrintMethod = "printAddrMode3Operand"; let ParserMatchClass = AddrMode3AsmOperand; let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); } +def addrmode3 : AddrMode3 +{ + let PrintMethod = "printAddrMode3Operand<false>"; +} + +def addrmode3_pre : AddrMode3 +{ + let PrintMethod = "printAddrMode3Operand<true>"; +} + // FIXME: split into imm vs. reg versions. // FIXME: parser method to handle +/- register. def AM3OffsetAsmOperand : AsmOperandClass { @@ -885,15 +916,22 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> { // addrmode5 := reg +/- imm8*4 // def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; } -def addrmode5 : Operand<i32>, - ComplexPattern<i32, 2, "SelectAddrMode5", []> { - let PrintMethod = "printAddrMode5Operand"; +class AddrMode5 : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode5", []> { let EncoderMethod = "getAddrMode5OpValue"; let DecoderMethod = "DecodeAddrMode5Operand"; let ParserMatchClass = AddrMode5AsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm); } +def addrmode5 : AddrMode5 { + let PrintMethod = "printAddrMode5Operand<false>"; +} + +def addrmode5_pre : AddrMode5 { + let PrintMethod = "printAddrMode5Operand<true>"; +} + // addrmode6 := reg with optional alignment // def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; } @@ -1668,11 +1706,11 @@ def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), NoItinerary, []>; } -def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary, +def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary, "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> { - bits<8> imm; - let Inst{27-8} = 0b00110010000011110000; - let Inst{7-0} = imm; + bits<3> imm; + let Inst{27-3} = 0b0011001000001111000000000; + let Inst{2-0} = imm; } def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>; @@ -2077,7 +2115,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { // Secure Monitor Call is a system instruction. def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", - []> { + []>, Requires<[IsARM, HasTrustZone]> { bits<4> opt; let Inst{23-4} = 0b01100000000000000111; let Inst{3-0} = opt; @@ -2238,7 +2276,7 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2), multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass iii, InstrItinClass iir> { def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addrmode_imm12:$addr), IndexModePre, LdFrm, iii, + (ins addrmode_imm12_pre:$addr), IndexModePre, LdFrm, iii, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<17> addr; let Inst{25} = 0; @@ -2275,6 +2313,7 @@ multiclass AI2_ldridx<bit isByte, string opc, let Inst{23} = offset{12}; let Inst{19-16} = addr; let Inst{11-0} = offset{11-0}; + let Inst{4} = 0; let DecoderMethod = "DecodeAddrMode2IdxInstruction"; } @@ -2307,7 +2346,7 @@ defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>; multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> { def _PRE : AI3ldstidx<op, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addrmode3:$addr), IndexModePre, + (ins addrmode3_pre:$addr), IndexModePre, LdMiscFrm, itin, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<14> addr; @@ -2341,7 +2380,7 @@ defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>; defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>; let hasExtraDefRegAllocReq = 1 in { def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), - (ins addrmode3:$addr), IndexModePre, + (ins addrmode3_pre:$addr), IndexModePre, LdMiscFrm, IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $Rn_wb", []> { @@ -2497,7 +2536,7 @@ def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr), multiclass AI2_stridx<bit isByte, string opc, InstrItinClass iii, InstrItinClass iir> { def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre, + (ins GPR:$Rt, addrmode_imm12_pre:$addr), IndexModePre, StFrm, iii, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<17> addr; @@ -2619,7 +2658,7 @@ def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, addrmode3:$addr), IndexModePre, + (ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre, StMiscFrm, IIC_iStore_bh_ru, "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<14> addr; @@ -2651,7 +2690,7 @@ def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb), let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr), + (ins GPR:$Rt, GPR:$Rt2, addrmode3_pre:$addr), IndexModePre, StMiscFrm, IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!", "$addr.base = $Rn_wb", []> { @@ -4426,7 +4465,7 @@ multiclass LdStCop<bit load, bit Dbit, string asm> { let Inst{7-0} = addr{7-0}; let DecoderMethod = "DecodeCopMemInstruction"; } - def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr), asm, "\t$cop, $CRd, $addr!", IndexModePre> { bits<13> addr; bits<4> cop; @@ -4497,7 +4536,7 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> { let Inst{7-0} = addr{7-0}; let DecoderMethod = "DecodeCopMemInstruction"; } - def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr), asm, "\t$cop, $CRd, $addr!", IndexModePre> { bits<13> addr; bits<4> cop; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index 0411ac4..896fd0f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -4316,6 +4316,24 @@ def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", + (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", + (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", + (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", + (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; + +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", + (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", + (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", + (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", + (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; + // Vector Bitwise Operations. def vnotd : PatFrag<(ops node:$in), @@ -4889,6 +4907,29 @@ def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs", "f32", v4f32, v4f32, fabs>; +def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), + (v2i32 (bitconvert (v8i8 (add DPR:$src, + (NEONvshrs DPR:$src, (i32 7))))))), + (VABSv8i8 DPR:$src)>; +def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), + (v2i32 (bitconvert (v4i16 (add DPR:$src, + (NEONvshrs DPR:$src, (i32 15))))))), + (VABSv4i16 DPR:$src)>; +def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), + (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), + (VABSv2i32 DPR:$src)>; +def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), + (v4i32 (bitconvert (v16i8 (add QPR:$src, + (NEONvshrs QPR:$src, (i32 7))))))), + (VABSv16i8 QPR:$src)>; +def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), + (v4i32 (bitconvert (v8i16 (add QPR:$src, + (NEONvshrs QPR:$src, (i32 15))))))), + (VABSv8i16 QPR:$src)>; +def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), + (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), + (VABSv4i32 QPR:$src)>; + def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>; def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td index c9d709e..4dacb86 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -150,7 +150,7 @@ def lo5AllOne : PatLeaf<(i32 imm), [{ def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";} def t2addrmode_imm12 : Operand<i32>, ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> { - let PrintMethod = "printAddrModeImm12Operand"; + let PrintMethod = "printAddrModeImm12Operand<false>"; let EncoderMethod = "getAddrModeImm12OpValue"; let DecoderMethod = "DecodeT2AddrModeImm12"; let ParserMatchClass = t2addrmode_imm12_asmoperand; @@ -3401,12 +3401,7 @@ class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary, bits<5> mode; bit M; - let Inst{31-27} = 0b11110; - let Inst{26} = 0; - let Inst{25-20} = 0b111010; - let Inst{19-16} = 0b1111; - let Inst{15-14} = 0b10; - let Inst{12} = 0; + let Inst{31-11} = 0b111100111010111110000; let Inst{10-9} = imod; let Inst{8} = M; let Inst{7-5} = iflags; @@ -3425,13 +3420,13 @@ let imod = 0, iflags = 0, M = 1 in // A6.3.4 Branches and miscellaneous control // Table A6-14 Change Processor State, and hint instructions -def t2HINT : T2I<(outs), (ins imm0_255:$imm), NoItinerary, "hint", "\t$imm",[]>{ - bits<8> imm; - let Inst{31-8} = 0b111100111010111110000000; - let Inst{7-0} = imm; +def t2HINT : T2I<(outs), (ins imm0_4:$imm), NoItinerary, "hint", "\t$imm",[]> { + bits<3> imm; + let Inst{31-3} = 0b11110011101011111000000000000; + let Inst{2-0} = imm; } -def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_255:$imm, pred:$p)>; +def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_4:$imm, pred:$p)>; def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>; def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>; def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>; @@ -3449,7 +3444,8 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> { // Secure Monitor Call is a system instruction. // Option = Inst{19-16} -def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []> { +def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", + []>, Requires<[IsThumb2, HasTrustZone]> { let Inst{31-27} = 0b11110; let Inst{26-20} = 0b1111111; let Inst{15-12} = 0b1000; diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index b7ac5d5..c8ed576 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -87,53 +87,6 @@ namespace { MachineBasicBlock::iterator i) : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} }; - class UnitRegsMap { - public: - UnitRegsMap(const TargetRegisterInfo* _TRI) : TRI(_TRI) {} - const SmallVector<unsigned, 4>& operator[](unsigned Reg) { - DenseMap<unsigned, SmallVector<unsigned, 4> >::iterator found = - Cache.find(Reg); - if (found != Cache.end()) - return found->second; - else - return Cache.insert(std::make_pair(Reg, this->getUnitRegs(Reg))) - .first->second; - } - private: - SmallVector<unsigned, 4> getUnitRegs(unsigned Reg) { - SmallVector<unsigned, 4> Res; - - const TargetRegisterClass* TRC = TRI->getMinimalPhysRegClass(Reg); - if (TRC == &ARM::QPRRegClass) { - if (Reg > ARM::Q7) { - Res.push_back(TRI->getSubReg(Reg, ARM::dsub_0)); - Res.push_back(TRI->getSubReg(Reg, ARM::dsub_1)); - return Res; - } - - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0)); - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1)); - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_2)); - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_3)); - - return Res; - } - - if (TRC == &ARM::DPRRegClass && Reg < ARM::D15) { - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0)); - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1)); - - return Res; - } - - Res.push_back(Reg); - - return Res; - - } - const TargetRegisterInfo* TRI; - DenseMap<unsigned, SmallVector<unsigned, 4> > Cache; - }; typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; @@ -175,11 +128,6 @@ namespace { MachineBasicBlock::iterator MBBI, bool &Advance, MachineBasicBlock::iterator &I); - unsigned AddMemOp(MemOpQueue& MemOps, - const MemOpQueueEntry newEntry, - UnitRegsMap& UnitRegsInfo, - SmallSet<unsigned, 4>& UsedUnitRegs, - unsigned At = -1U); bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); }; @@ -1265,103 +1213,12 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, return false; } -/// AddMemOp - helper for ARMLoadStoreOpt::LoadStoreMultipleOpti. -/// It adds store mem ops with simple push_back/insert method, -/// without any additional logic. -/// For load operation it does the next: -/// 1. Adds new load operation into MemOp collection at "At" position. -/// 2. Removes any "load" operations from MemOps, that changes "Reg" register -/// contents, prior to "At". -/// UnitRegsInfo - Map of type Map< Register, UnitRegisters-vector > -/// UsedUnitRegs - set of unit-registers currently in use. -/// At - position at which it would added, and prior which the clean-up -/// should be made (for load operation). -/// FIXME: The clean-up also should be made for store operations, -/// but the memory address should be analyzed instead of unit registers. -unsigned ARMLoadStoreOpt::AddMemOp(MemOpQueue& MemOps, - const MemOpQueueEntry NewEntry, - UnitRegsMap& UnitRegsInfo, - SmallSet<unsigned, 4>& UsedUnitRegs, - unsigned At) { - unsigned Cleaned = 0; - - if (At == -1U) { - At = MemOps.size(); - MemOps.push_back(NewEntry); - } else - MemOps.insert(&MemOps[At], NewEntry); - - // FIXME: - // If operation is not load, leave it as is by now, - // So 0 overridden ops would cleaned in this case. - if (!NewEntry.MBBI->mayLoad()) - return 0; - - const SmallVector<unsigned, 4>& NewEntryUnitRegs = UnitRegsInfo[NewEntry.Reg]; - - bool FoundOverriddenLoads = false; - - for (unsigned i = 0, e = NewEntryUnitRegs.size(); i != e; ++i) - if (UsedUnitRegs.count(NewEntryUnitRegs[i])) { - FoundOverriddenLoads = true; - break; - } - - // If we detect that this register is used by load operations that are - // predecessors for the new one, remove them from MemOps then. - if (FoundOverriddenLoads) { - MemOpQueue UpdatedMemOps; - - // Scan through MemOps entries. - for (unsigned i = 0; i != At; ++i) { - MemOpQueueEntry& MemOpEntry = MemOps[i]; - - // FIXME: Skip non-load operations by now. - if (!MemOpEntry.MBBI->mayLoad()) - continue; - - const SmallVector<unsigned, 4>& MemOpUnitRegs = - UnitRegsInfo[MemOpEntry.Reg]; - - // Lookup entry that loads contents into register used by new entry. - bool ReleaseThisEntry = false; - for (unsigned m = 0, em = MemOpUnitRegs.size(); m != em; ++m) { - if (std::find(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end(), - MemOpUnitRegs[m]) != NewEntryUnitRegs.end()) { - ReleaseThisEntry = true; - ++Cleaned; - break; - } - } - - if (ReleaseThisEntry) { - const SmallVector<unsigned, 4>& RelesedRegs = UnitRegsInfo[MemOpEntry.Reg]; - for (unsigned r = 0, er = RelesedRegs.size(); r != er; ++r) - UsedUnitRegs.erase(RelesedRegs[r]); - } else - UpdatedMemOps.push_back(MemOpEntry); - } - - // Keep anything without changes after At position. - for (unsigned i = At, e = MemOps.size(); i != e; ++i) - UpdatedMemOps.push_back(MemOps[i]); - - MemOps.swap(UpdatedMemOps); - } - - UsedUnitRegs.insert(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end()); - - return Cleaned; -} - /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR /// ops of the same base and incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { unsigned NumMerges = 0; unsigned NumMemOps = 0; MemOpQueue MemOps; - UnitRegsMap UnitRegsInfo(TRI); - SmallSet<unsigned, 4> UsedRegUnits; unsigned CurrBase = 0; int CurrOpc = -1; unsigned CurrSize = 0; @@ -1401,6 +1258,22 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // merge the ldr's so far, including this one. But don't try to // combine the following ldr(s). Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); + + // Watch out for: + // r4 := ldr [r0, #8] + // r4 := ldr [r0, #4] + // + // The optimization may reorder the second ldr in front of the first + // ldr, which violates write after write(WAW) dependence. The same as + // str. Try to merge inst(s) already in MemOps. + bool Overlap = false; + for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { + if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { + Overlap = true; + break; + } + } + if (CurrBase == 0 && !Clobber) { // Start of a new chain. CurrBase = Base; @@ -1408,13 +1281,10 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrSize = Size; CurrPred = Pred; CurrPredReg = PredReg; - MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); ++NumMemOps; - const SmallVector<unsigned, 4>& EntryUnitRegs = UnitRegsInfo[Reg]; - UsedRegUnits.insert(EntryUnitRegs.begin(), EntryUnitRegs.end()); Advance = true; - } else { + } else if (!Overlap) { if (Clobber) { TryMerge = true; Advance = true; @@ -1424,24 +1294,20 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // No need to match PredReg. // Continue adding to the queue. if (Offset > MemOps.back().Offset) { - unsigned OverridesCleaned = - AddMemOp(MemOps, - MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI), - UnitRegsInfo, UsedRegUnits) != 0; - NumMemOps += 1 - OverridesCleaned; + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); + ++NumMemOps; Advance = true; } else { - for (unsigned I = 0; I != NumMemOps; ++I) { - if (Offset < MemOps[I].Offset) { - MemOpQueueEntry entry(Offset, Reg, isKill, Position, MBBI); - unsigned OverridesCleaned = - AddMemOp(MemOps, entry, UnitRegsInfo, - UsedRegUnits, I) != 0; - NumMemOps += 1 - OverridesCleaned; - + for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); + I != E; ++I) { + if (Offset < I->Offset) { + MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); + ++NumMemOps; Advance = true; break; - } else if (Offset == MemOps[I].Offset) { + } else if (Offset == I->Offset) { // Collision! This can't be merged! break; } @@ -1512,7 +1378,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrPredReg = 0; if (NumMemOps) { MemOps.clear(); - UsedRegUnits.clear(); NumMemOps = 0; } diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index 88d96c0..f4248fc 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -38,7 +38,7 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// VarArgsRegSaveSize - Size of the register save area for vararg functions. /// - unsigned VarArgsRegSaveSize; + unsigned ArgRegsSaveSize; /// HasStackFrame - True if this function has a stack frame. Set by /// processFunctionBeforeCalleeSavedScan(). @@ -117,7 +117,7 @@ public: ARMFunctionInfo() : isThumb(false), hasThumb2(false), - VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), + ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), @@ -129,7 +129,7 @@ public: explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()), - VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), + ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), @@ -141,8 +141,8 @@ public: bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } bool isThumb2Function() const { return isThumb && hasThumb2; } - unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; } - void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; } + unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; } + void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; } bool hasStackFrame() const { return HasStackFrame; } void setHasStackFrame(bool s) { HasStackFrame = s; } diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp index 739300e..8653c46 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -91,6 +91,7 @@ void ARMSubtarget::initializeEnvironment() { HasRAS = false; HasMPExtension = false; FPOnlySP = false; + HasTrustZone = false; AllowsUnalignedMem = false; Thumb2DSP = false; UseNaClTrap = false; diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index 5b5ee6a..038eb76 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -148,6 +148,9 @@ protected: /// precision. bool FPOnlySP; + /// HasTrustZone - if true, processor supports TrustZone security extensions + bool HasTrustZone; + /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory /// accesses for some types. For details, see /// ARMTargetLowering::allowsUnalignedMemoryAccesses(). @@ -251,6 +254,7 @@ public: bool hasVMLxForwarding() const { return HasVMLxForwarding; } bool isFPBrccSlow() const { return SlowFPBrcc; } bool isFPOnlySP() const { return FPOnlySP; } + bool hasTrustZone() const { return HasTrustZone; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 1019b97..53ece66 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -125,6 +125,10 @@ public: unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; unsigned getAddressComputationCost(Type *Val) const; + + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind Op1Info = OK_AnyValue, + OperandValueKind Op2Info = OK_AnyValue) const; /// @} }; @@ -223,9 +227,9 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, - // Operations that we legalize using load/stores to the stack. - { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 }, - { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 }, + // Operations that we legalize using splitting. + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, // Vector float <-> i32 conversions. { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, @@ -456,3 +460,67 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, return LT.first * NEONShuffleTbl[Idx].Cost; } + +unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Op1Info, + OperandValueKind Op2Info) const { + + int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + + const unsigned FunctionCallDivCost = 20; + const unsigned ReciprocalDivCost = 10; + static const CostTblEntry<MVT> CostTbl[] = { + // Division. + // These costs are somewhat random. Choose a cost of 20 to indicate that + // vectorizing devision (added function call) is going to be very expensive. + // Double registers types. + { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost}, + { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost}, + { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost}, + { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost}, + { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v4i16, ReciprocalDivCost}, + { ISD::UDIV, MVT::v4i16, ReciprocalDivCost}, + { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost}, + { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v8i8, ReciprocalDivCost}, + { ISD::UDIV, MVT::v8i8, ReciprocalDivCost}, + { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost}, + { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost}, + // Quad register types. + { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost}, + { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost}, + { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost}, + { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost}, + { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost}, + { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost}, + { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost}, + { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost}, + { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost}, + // Multiplication. + }; + + int Idx = -1; + + if (ST->hasNEON()) + Idx = CostTableLookup<MVT>(CostTbl, array_lengthof(CostTbl), ISDOpcode, + LT.second); + + if (Idx != -1) + return LT.first * CostTbl[Idx].Cost; + + + return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, + Op2Info); +} + diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index ed7b7ec..1dd2953 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -86,11 +86,11 @@ class ARMAsmParser : public MCTargetAsmParser { MCAsmLexer &getLexer() const { return Parser.getLexer(); } bool Warning(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + ArrayRef<SMRange> Ranges = None) { return Parser.Warning(L, Msg, Ranges); } bool Error(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + ArrayRef<SMRange> Ranges = None) { return Parser.Error(L, Msg, Ranges); } @@ -610,6 +610,13 @@ public: int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020; } + bool isImm0_4() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 5; + } bool isImm0_1020s4() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -4745,6 +4752,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" || Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" || + Mnemonic == "vaclt" || Mnemonic == "vacle" || Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" || @@ -5014,8 +5022,8 @@ static bool isDataTypeToken(StringRef Tok) { static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm"); } - -static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features); +static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features, + unsigned VariantID); /// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, @@ -5026,7 +5034,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // MatchInstructionImpl(), but that's too late for aliases that include // any sort of suffix. unsigned AvailableFeatures = getAvailableFeatures(); - applyMnemonicAliases(Name, AvailableFeatures); + unsigned AssemblerDialect = getParser().getAssemblerDialect(); + applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect); // First check for the ARM-specific .req directive. if (Parser.getTok().is(AsmToken::Identifier) && @@ -7613,6 +7622,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, "instruction variant requires ARMv6 or later"); case Match_RequiresThumb2: return Error(IDLoc, "instruction variant requires Thumb2"); + case Match_ImmRange0_4: { + SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + return Error(ErrorLoc, "immediate operand must be in the range [0,4]"); + } case Match_ImmRange0_15: { SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 2e009e5..ac937f3 100644 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -308,6 +308,8 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder); static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, @@ -1951,10 +1953,12 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, Inst.addOperand(MCOperand::CreateImm(mode)); if (iflags) S = MCDisassembler::SoftFail; } else { - // imod == '00' && M == '0' --> UNPREDICTABLE - Inst.setOpcode(ARM::t2CPS1p); - Inst.addOperand(MCOperand::CreateImm(mode)); - S = MCDisassembler::SoftFail; + // imod == '00' && M == '0' --> this is a HINT instruction + int imm = fieldFromInstruction(Insn, 0, 8); + // HINT are defined only for immediate in [0..4] + if(imm > 4) return MCDisassembler::Fail; + Inst.setOpcode(ARM::t2HINT); + Inst.addOperand(MCOperand::CreateImm(imm)); } return S; @@ -1996,9 +2000,10 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, imm |= (fieldFromInstruction(Insn, 16, 4) << 12); if (Inst.getOpcode() == ARM::MOVTi16) - if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; - if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder)) @@ -3570,7 +3575,7 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, unsigned Rn = fieldFromInstruction(Insn, 16, 4); unsigned pred = fieldFromInstruction(Insn, 28, 4); - if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail; @@ -4496,6 +4501,15 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, return S; } +static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) +{ + unsigned Imm = fieldFromInstruction(Insn, 0, 3); + if (Imm > 4) return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(Imm)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 2afb20d..3bcd083 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -490,7 +490,8 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op, } void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, - raw_ostream &O) { + raw_ostream &O, + bool AlwaysPrintImm0) { const MCOperand &MO1 = MI->getOperand(Op); const MCOperand &MO2 = MI->getOperand(Op+1); const MCOperand &MO3 = MI->getOperand(Op+2); @@ -509,7 +510,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm()); - if (ImmOffs || (op == ARM_AM::sub)) { + if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) { O << ", " << markup("<imm:") << "#" @@ -520,6 +521,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, O << ']' << markup(">"); } +template <bool AlwaysPrintImm0> void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); @@ -535,7 +537,7 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, printAM3PostIndexOp(MI, Op, O); return; } - printAM3PreOrOffsetIndexOp(MI, Op, O); + printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0); } void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, @@ -593,6 +595,7 @@ void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum, O << ARM_AM::getAMSubModeStr(Mode); } +template <bool AlwaysPrintImm0> void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); @@ -608,7 +611,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm()); unsigned Op = ARM_AM::getAM5Op(MO2.getImm()); - if (ImmOffs || Op == ARM_AM::sub) { + if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) { O << ", " << markup("<imm:") << "#" @@ -1022,6 +1025,7 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum, ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup); } +template <bool AlwaysPrintImm0> void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); @@ -1042,13 +1046,13 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, OffImm = 0; if (isSub) { O << ", " - << markup("<imm:") + << markup("<imm:") << "#-" << -OffImm << markup(">"); } - else if (OffImm > 0) { + else if (AlwaysPrintImm0 || OffImm > 0) { O << ", " - << markup("<imm:") + << markup("<imm:") << "#" << OffImm << markup(">"); } diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index edff75d..344104e 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -47,12 +47,13 @@ public: raw_ostream &O); void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - + template <bool AlwaysPrintImm0> void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O); - void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,raw_ostream &O); + void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O, + bool AlwaysPrintImm0); void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); @@ -60,6 +61,7 @@ public: raw_ostream &O); void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template <bool AlwaysPrintImm0> void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); @@ -91,6 +93,7 @@ public: raw_ostream &O); void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template<bool AlwaysPrintImm0> void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum, diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 418971d..6c3d247 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -13,7 +13,9 @@ // //===----------------------------------------------------------------------===// +#include "ARMRegisterInfo.h" #include "ARMUnwindOp.h" +#include "ARMUnwindOpAsm.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" @@ -26,6 +28,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" @@ -33,11 +36,15 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +static std::string GetAEABIUnwindPersonalityName(unsigned Index) { + assert(Index < NUM_PERSONALITY_INDEX && "Invalid personality index"); + return (Twine("__aeabi_unwind_cpp_pr") + Twine(Index)).str(); +} + namespace { /// Extend the generic ELFStreamer class so that it can emit mapping symbols at @@ -57,8 +64,9 @@ public: ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, bool IsThumb) : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter), - IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), ExTab(0), - FnStart(0), Personality(0), CantUnwind(false) {} + IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) { + Reset(); + } ~ARMELFStreamer() {} @@ -75,14 +83,15 @@ public: virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector); - virtual void ChangeSection(const MCSection *Section) { + virtual void ChangeSection(const MCSection *Section, + const MCExpr *Subsection) { // We have to keep track of the mapping symbol state of any sections we // use. Each one should start off as EMS_None, which is provided as the // default constructor by DenseMap::lookup. - LastMappingSymbols[getPreviousSection()] = LastEMS; + LastMappingSymbols[getPreviousSection().first] = LastEMS; LastEMS = LastMappingSymbols.lookup(Section); - MCELFStreamer::ChangeSection(Section); + MCELFStreamer::ChangeSection(Section, Subsection); } /// This function is the one used to emit instruction data into the ELF @@ -175,7 +184,7 @@ private: MCELF::SetType(SD, ELF::STT_NOTYPE); MCELF::SetBinding(SD, ELF::STB_LOCAL); SD.setExternal(false); - Symbol->setSection(*getCurrentSection()); + Symbol->setSection(*getCurrentSection().first); const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); Symbol->setVariableValue(Value); @@ -194,6 +203,7 @@ private: void Reset(); void EmitPersonalityFixup(StringRef Name); + void CollectUnwindOpcodes(); void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags, SectionKind Kind, const MCSymbol &Fn); @@ -210,9 +220,16 @@ private: MCSymbol *ExTab; MCSymbol *FnStart; const MCSymbol *Personality; + uint32_t VFPRegSave; // Register mask for {d31-d0} + uint32_t RegSave; // Register mask for {r15-r0} + int64_t SPOffset; + uint16_t FPReg; + int64_t FPOffset; + bool UsedFP; bool CantUnwind; + UnwindOpcodeAssembler UnwindOpAsm; }; -} +} // end anonymous namespace inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, unsigned Type, @@ -238,7 +255,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, } else { EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind); } - assert(EHSection); + assert(EHSection && "Failed to get the required EH section"); // Switch to .ARM.extab or .ARM.exidx section SwitchSection(EHSection); @@ -262,10 +279,20 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) { } void ARMELFStreamer::Reset() { + const MCRegisterInfo &MRI = getContext().getRegisterInfo(); + ExTab = NULL; FnStart = NULL; Personality = NULL; + VFPRegSave = 0; + RegSave = 0; + FPReg = MRI.getEncodingValue(ARM::SP); + FPOffset = 0; + SPOffset = 0; + UsedFP = false; CantUnwind = false; + + UnwindOpAsm.Reset(); } // Add the R_ARM_NONE fixup at the same position @@ -284,6 +311,18 @@ void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) { MCFixup::getKindForSize(4, false))); } +void ARMELFStreamer::CollectUnwindOpcodes() { + if (UsedFP) { + UnwindOpAsm.EmitSetFP(FPReg); + UnwindOpAsm.EmitSPOffset(-FPOffset); + } else { + UnwindOpAsm.EmitSPOffset(SPOffset); + } + UnwindOpAsm.EmitVFPRegSave(VFPRegSave); + UnwindOpAsm.EmitRegSave(RegSave); + UnwindOpAsm.Finalize(); +} + void ARMELFStreamer::EmitFnStart() { assert(FnStart == 0); FnStart = getContext().CreateTempSymbol(); @@ -294,35 +333,29 @@ void ARMELFStreamer::EmitFnEnd() { assert(FnStart && ".fnstart must preceeds .fnend"); // Emit unwind opcodes if there is no .handlerdata directive - int PersonalityIndex = -1; if (!ExTab && !CantUnwind) { - // For __aeabi_unwind_cpp_pr1, we have to emit opcodes in .ARM.extab. - SwitchToExTabSection(*FnStart); - - // Create .ARM.extab label for offset in .ARM.exidx - ExTab = getContext().CreateTempSymbol(); - EmitLabel(ExTab); - - PersonalityIndex = 1; - - uint32_t Entry = 0; - uint32_t NumExtraEntryWords = 0; - Entry |= NumExtraEntryWords << 24; - Entry |= (EHT_COMPACT | PersonalityIndex) << 16; - - // TODO: This should be generated according to .save, .vsave, .setfp - // directives. Currently, we are simply generating FINISH opcode. - Entry |= UNWIND_OPCODE_FINISH << 8; - Entry |= UNWIND_OPCODE_FINISH; - - EmitIntValue(Entry, 4, 0); + CollectUnwindOpcodes(); + + unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex(); + if (PersonalityIndex == AEABI_UNWIND_CPP_PR1 || + PersonalityIndex == AEABI_UNWIND_CPP_PR2) { + // For the __aeabi_unwind_cpp_pr1 and __aeabi_unwind_cpp_pr2, we have to + // emit the unwind opcodes in the corresponding ".ARM.extab" section, and + // then emit a reference to these unwind opcodes in the second word of + // the exception index table entry. + SwitchToExTabSection(*FnStart); + ExTab = getContext().CreateTempSymbol(); + EmitLabel(ExTab); + EmitBytes(UnwindOpAsm.data(), 0); + } } // Emit the exception index table entry SwitchToExIdxSection(*FnStart); - if (PersonalityIndex == 1) - EmitPersonalityFixup("__aeabi_unwind_cpp_pr1"); + unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex(); + if (PersonalityIndex < NUM_PERSONALITY_INDEX) + EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex)); const MCSymbolRefExpr *FnStartRef = MCSymbolRefExpr::Create(FnStart, @@ -333,12 +366,22 @@ void ARMELFStreamer::EmitFnEnd() { if (CantUnwind) { EmitIntValue(EXIDX_CANTUNWIND, 4, 0); - } else { + } else if (ExTab) { + // Emit a reference to the unwind opcodes in the ".ARM.extab" section. const MCSymbolRefExpr *ExTabEntryRef = MCSymbolRefExpr::Create(ExTab, MCSymbolRefExpr::VK_ARM_PREL31, getContext()); EmitValue(ExTabEntryRef, 4, 0); + } else { + // For the __aeabi_unwind_cpp_pr0, we have to emit the unwind opcodes in + // the second word of exception index table entry. The size of the unwind + // opcodes should always be 4 bytes. + assert(PersonalityIndex == AEABI_UNWIND_CPP_PR0 && + "Compact model must use __aeabi_cpp_unwind_pr0 as personality"); + assert(UnwindOpAsm.size() == 4u && + "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4"); + EmitBytes(UnwindOpAsm.data(), 0); } // Clean exception handling frame information @@ -368,36 +411,54 @@ void ARMELFStreamer::EmitHandlerData() { EmitValue(PersonalityRef, 4, 0); // Emit unwind opcodes - uint32_t Entry = 0; - uint32_t NumExtraEntryWords = 0; - - // TODO: This should be generated according to .save, .vsave, .setfp - // directives. Currently, we are simply generating FINISH opcode. - Entry |= NumExtraEntryWords << 24; - Entry |= UNWIND_OPCODE_FINISH << 16; - Entry |= UNWIND_OPCODE_FINISH << 8; - Entry |= UNWIND_OPCODE_FINISH; - - EmitIntValue(Entry, 4, 0); + CollectUnwindOpcodes(); + EmitBytes(UnwindOpAsm.data(), 0); } void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) { Personality = Per; + UnwindOpAsm.setPersonality(Per); } -void ARMELFStreamer::EmitSetFP(unsigned NewFpReg, - unsigned NewSpReg, +void ARMELFStreamer::EmitSetFP(unsigned NewFPReg, + unsigned NewSPReg, int64_t Offset) { - // TODO: Not implemented + assert(SPOffset == 0 && + "Current implementation assumes .setfp precedes .pad"); + + const MCRegisterInfo &MRI = getContext().getRegisterInfo(); + + uint16_t NewFPRegEncVal = MRI.getEncodingValue(NewFPReg); +#ifndef NDEBUG + uint16_t NewSPRegEncVal = MRI.getEncodingValue(NewSPReg); +#endif + + assert((NewSPReg == ARM::SP || NewSPRegEncVal == FPReg) && + "the operand of .setfp directive should be either $sp or $fp"); + + UsedFP = true; + FPReg = NewFPRegEncVal; + FPOffset = Offset; } void ARMELFStreamer::EmitPad(int64_t Offset) { - // TODO: Not implemented + SPOffset += Offset; } void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool IsVector) { - // TODO: Not implemented + const MCRegisterInfo &MRI = getContext().getRegisterInfo(); + +#ifndef NDEBUG + unsigned Max = IsVector ? 32 : 16; +#endif + uint32_t &RegMask = IsVector ? VFPRegSave : RegSave; + + for (size_t i = 0; i < RegList.size(); ++i) { + unsigned Reg = MRI.getEncodingValue(RegList[i]); + assert(Reg < Max && "Register encoded value out of range"); + RegMask |= 1u << Reg; + } } namespace llvm { diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h index dad5576..fa4add6 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h @@ -107,6 +107,19 @@ namespace llvm { UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0 }; + /// ARM-defined Personality Routine Index + enum ARMPersonalityRoutineIndex { + // To make the exception handling table become more compact, ARM defined + // several personality routines in EHABI. There are 3 different + // personality routines in ARM EHABI currently. It is possible to have 16 + // pre-defined personality routines at most. + AEABI_UNWIND_CPP_PR0 = 0, + AEABI_UNWIND_CPP_PR1 = 1, + AEABI_UNWIND_CPP_PR2 = 2, + + NUM_PERSONALITY_INDEX + }; + } #endif // ARM_UNWIND_OP_H diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp new file mode 100644 index 0000000..191db69 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp @@ -0,0 +1,198 @@ +//===-- ARMUnwindOpAsm.cpp - ARM Unwind Opcodes Assembler -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the unwind opcode assmebler for ARM exception handling +// table. +// +//===----------------------------------------------------------------------===// + +#include "ARMUnwindOpAsm.h" + +#include "ARMUnwindOp.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" + +using namespace llvm; + +void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { + if (RegSave == 0u) + return; + + // One byte opcode to save register r14 and r11-r4 + if (RegSave & (1u << 4)) { + // The one byte opcode will always save r4, thus we can't use the one byte + // opcode when r4 is not in .save directive. + + // Compute the consecutive registers from r4 to r11. + uint32_t Range = 0; + uint32_t Mask = (1u << 4); + for (uint32_t Bit = (1u << 5); Bit < (1u << 12); Bit <<= 1) { + if ((RegSave & Bit) == 0u) + break; + ++Range; + Mask |= Bit; + } + + // Emit this opcode when the mask covers every registers. + uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask); + if (UnmaskedReg == 0u) { + // Pop r[4 : (4 + n)] + Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range); + RegSave &= 0x000fu; + } else if (UnmaskedReg == (1u << 14)) { + // Pop r[14] + r[4 : (4 + n)] + Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range); + RegSave &= 0x000fu; + } + } + + // Two bytes opcode to save register r15-r4 + if ((RegSave & 0xfff0u) != 0) { + uint32_t Op = UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4); + Ops.push_back(static_cast<uint8_t>(Op >> 8)); + Ops.push_back(static_cast<uint8_t>(Op & 0xff)); + } + + // Opcode to save register r3-r0 + if ((RegSave & 0x000fu) != 0) { + uint32_t Op = UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu); + Ops.push_back(static_cast<uint8_t>(Op >> 8)); + Ops.push_back(static_cast<uint8_t>(Op & 0xff)); + } +} + +/// Emit unwind opcodes for .vsave directives +void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) { + size_t i = 32; + + while (i > 16) { + uint32_t Bit = 1u << (i - 1); + if ((VFPRegSave & Bit) == 0u) { + --i; + continue; + } + + uint32_t Range = 0; + + --i; + Bit >>= 1; + + while (i > 16 && (VFPRegSave & Bit)) { + --i; + ++Range; + Bit >>= 1; + } + + uint32_t Op = + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | ((i - 16) << 4) | Range; + Ops.push_back(static_cast<uint8_t>(Op >> 8)); + Ops.push_back(static_cast<uint8_t>(Op & 0xff)); + } + + while (i > 0) { + uint32_t Bit = 1u << (i - 1); + if ((VFPRegSave & Bit) == 0u) { + --i; + continue; + } + + uint32_t Range = 0; + + --i; + Bit >>= 1; + + while (i > 0 && (VFPRegSave & Bit)) { + --i; + ++Range; + Bit >>= 1; + } + + uint32_t Op = UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range; + Ops.push_back(static_cast<uint8_t>(Op >> 8)); + Ops.push_back(static_cast<uint8_t>(Op & 0xff)); + } +} + +/// Emit unwind opcodes for .setfp directives +void UnwindOpcodeAssembler::EmitSetFP(uint16_t FPReg) { + Ops.push_back(UNWIND_OPCODE_SET_VSP | FPReg); +} + +/// Emit unwind opcodes to update stack pointer +void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) { + if (Offset > 0x200) { + uint8_t Buff[10]; + size_t Size = encodeULEB128((Offset - 0x204) >> 2, Buff); + Ops.push_back(UNWIND_OPCODE_INC_VSP_ULEB128); + Ops.append(Buff, Buff + Size); + } else if (Offset > 0) { + if (Offset > 0x100) { + Ops.push_back(UNWIND_OPCODE_INC_VSP | 0x3fu); + Offset -= 0x100; + } + Ops.push_back(UNWIND_OPCODE_INC_VSP | + static_cast<uint8_t>((Offset - 4) >> 2)); + } else if (Offset < 0) { + while (Offset < -0x100) { + Ops.push_back(UNWIND_OPCODE_DEC_VSP | 0x3fu); + Offset += 0x100; + } + Ops.push_back(UNWIND_OPCODE_DEC_VSP | + static_cast<uint8_t>(((-Offset) - 4) >> 2)); + } +} + +void UnwindOpcodeAssembler::AddOpcodeSizePrefix(size_t Pos) { + size_t SizeInWords = (size() + 3) / 4; + assert(SizeInWords <= 0x100u && + "Only 256 additional words are allowed for unwind opcodes"); + Ops[Pos] = static_cast<uint8_t>(SizeInWords - 1); +} + +void UnwindOpcodeAssembler::AddPersonalityIndexPrefix(size_t Pos, unsigned PI) { + assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix"); + Ops[Pos] = EHT_COMPACT | PI; +} + +void UnwindOpcodeAssembler::EmitFinishOpcodes() { + for (size_t i = (0x4u - (size() & 0x3u)) & 0x3u; i > 0; --i) + Ops.push_back(UNWIND_OPCODE_FINISH); +} + +void UnwindOpcodeAssembler::Finalize() { + if (HasPersonality) { + // Personality specified by .personality directive + Offset = 1; + AddOpcodeSizePrefix(1); + } else { + if (getOpcodeSize() <= 3) { + // __aeabi_unwind_cpp_pr0: [ 0x80 , OP1 , OP2 , OP3 ] + Offset = 1; + PersonalityIndex = AEABI_UNWIND_CPP_PR0; + AddPersonalityIndexPrefix(Offset, PersonalityIndex); + } else { + // __aeabi_unwind_cpp_pr1: [ 0x81 , SIZE , OP1 , OP2 , ... ] + Offset = 0; + PersonalityIndex = AEABI_UNWIND_CPP_PR1; + AddPersonalityIndexPrefix(Offset, PersonalityIndex); + AddOpcodeSizePrefix(1); + } + } + + // Emit the padding finish opcodes if the size() is not multiple of 4. + EmitFinishOpcodes(); + + // Swap the byte order + uint8_t *Ptr = Ops.begin() + Offset; + assert(size() % 4 == 0 && "Final unwind opcodes should align to 4"); + for (size_t i = 0, n = size(); i < n; i += 4) { + std::swap(Ptr[i], Ptr[i + 3]); + std::swap(Ptr[i + 1], Ptr[i + 2]); + } +} diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h new file mode 100644 index 0000000..f6ecaeb --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h @@ -0,0 +1,114 @@ +//===-- ARMUnwindOpAsm.h - ARM Unwind Opcodes Assembler ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the unwind opcode assmebler for ARM exception handling +// table. +// +//===----------------------------------------------------------------------===// + +#ifndef ARM_UNWIND_OP_ASM_H +#define ARM_UNWIND_OP_ASM_H + +#include "ARMUnwindOp.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class MCSymbol; + +class UnwindOpcodeAssembler { +private: + llvm::SmallVector<uint8_t, 8> Ops; + + unsigned Offset; + unsigned PersonalityIndex; + bool HasPersonality; + + enum { + // The number of bytes to be preserved for the size and personality index + // prefix of unwind opcodes. + NUM_PRESERVED_PREFIX_BUF = 2 + }; + +public: + UnwindOpcodeAssembler() + : Ops(NUM_PRESERVED_PREFIX_BUF), Offset(NUM_PRESERVED_PREFIX_BUF), + PersonalityIndex(NUM_PERSONALITY_INDEX), HasPersonality(0) { + } + + /// Reset the unwind opcode assembler. + void Reset() { + Ops.resize(NUM_PRESERVED_PREFIX_BUF); + Offset = NUM_PRESERVED_PREFIX_BUF; + PersonalityIndex = NUM_PERSONALITY_INDEX; + HasPersonality = 0; + } + + /// Get the size of the payload (including the size byte) + size_t size() const { + return Ops.size() - Offset; + } + + /// Get the beginning of the payload + const uint8_t *begin() const { + return Ops.begin() + Offset; + } + + /// Get the payload + StringRef data() const { + return StringRef(reinterpret_cast<const char *>(begin()), size()); + } + + /// Set the personality index + void setPersonality(const MCSymbol *Per) { + HasPersonality = 1; + } + + /// Get the personality index + unsigned getPersonalityIndex() const { + return PersonalityIndex; + } + + /// Emit unwind opcodes for .save directives + void EmitRegSave(uint32_t RegSave); + + /// Emit unwind opcodes for .vsave directives + void EmitVFPRegSave(uint32_t VFPRegSave); + + /// Emit unwind opcodes for .setfp directives + void EmitSetFP(uint16_t FPReg); + + /// Emit unwind opcodes to update stack pointer + void EmitSPOffset(int64_t Offset); + + /// Finalize the unwind opcode sequence for EmitBytes() + void Finalize(); + +private: + /// Get the size of the opcodes in bytes. + size_t getOpcodeSize() const { + return Ops.size() - NUM_PRESERVED_PREFIX_BUF; + } + + /// Add the length prefix to the payload + void AddOpcodeSizePrefix(size_t Pos); + + /// Add personality index prefix in some compact format + void AddPersonalityIndexPrefix(size_t Pos, unsigned PersonalityIndex); + + /// Fill the words with finish opcode if it is not aligned + void EmitFinishOpcodes(); +}; + +} // namespace llvm + +#endif // ARM_UNWIND_OP_ASM_H diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index 2c3388c..1e2a8b0 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -88,7 +88,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -104,8 +104,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; - if (VARegSaveSize) - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize, + if (ArgRegsSaveSize) + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, MachineInstr::FrameSetup); if (!AFI->hasStackFrame()) { @@ -249,7 +249,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); @@ -300,7 +300,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, } } - if (VARegSaveSize) { + if (ArgRegsSaveSize) { // Unlike T2 and ARM mode, the T1 pop instruction cannot restore // to LR, and we can't pop the value directly to the PC since // we need to update the SP after popping the value. Therefore, we @@ -313,7 +313,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(ARM::R3, RegState::Define); - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize); + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) @@ -376,7 +376,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; + bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)); AddDefaultPred(MIB); diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 67e8ec7..a1b48c2 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" @@ -126,25 +127,41 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOStore, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass || RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass || RC == &ARM::GPRnopcRegClass) { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOStore, - MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); return; } + if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { + // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for + // gsub_0, but needs an extra constraint for gsub_1 (which could be sp + // otherwise). + MachineRegisterInfo *MRI = &MF.getRegInfo(); + MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8)); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO); + AddDefaultPred(MIB); + return; + } + ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI); } @@ -153,24 +170,42 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass || RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass || RC == &ARM::GPRnopcRegClass) { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); return; } + if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { + // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for + // gsub_0, but needs an extra constraint for gsub_1 (which could be sp + // otherwise). + MachineRegisterInfo *MRI = &MF.getRegInfo(); + MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8)); + AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); + AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO); + AddDefaultPred(MIB); + + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); + return; + } + ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI); } @@ -514,6 +549,15 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Offset = -Offset; isSub = true; } + } else if (AddrMode == ARMII::AddrModeT2_i8s4) { + Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4; + NumBits = 8; + // MCInst operand has already scaled value. + Scale = 1; + if (Offset < 0) { + isSub = true; + Offset = -Offset; + } } else { llvm_unreachable("Unsupported addressing mode!"); } diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index d50f5d9..4795aae 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -926,13 +926,11 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { HighLatencyCPSR = false; // Check predecessors for the latest CPSRDef. - bool HasBackEdges = false; for (MachineBasicBlock::pred_iterator I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) { const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()]; if (!PInfo.Visited) { // Since blocks are visited in RPO, this must be a back-edge. - HasBackEdges = true; continue; } if (PInfo.HighLatencyCPSR) { |