diff options
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r-- | lib/Target/ARM/ARM.h | 10 | ||||
-rw-r--r-- | lib/Target/ARM/ARM.td | 8 | ||||
-rw-r--r-- | lib/Target/ARM/ARMCallingConv.td | 60 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 7 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 5 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb.td | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrVFP.td | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 336 | ||||
-rw-r--r-- | lib/Target/ARM/ARMRegisterInfo.td | 15 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSubtarget.cpp | 6 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSubtarget.h | 4 | ||||
-rw-r--r-- | lib/Target/ARM/ARMTargetMachine.cpp | 13 | ||||
-rw-r--r-- | lib/Target/ARM/ARMTargetMachine.h | 1 |
13 files changed, 429 insertions, 41 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index ac7de91..7edd118 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -98,12 +98,12 @@ FunctionPass *createARMCodePrinterPass(raw_ostream &O, FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM, MachineCodeEmitter &MCE); -FunctionPass *createARMCodeEmitterPass( ARMTargetMachine &TM, - MachineCodeEmitter &MCE); -FunctionPass *createARMJITCodeEmitterPass( ARMTargetMachine &TM, - JITCodeEmitter &JCE); +FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM, + MachineCodeEmitter &MCE); +FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM, + JITCodeEmitter &JCE); -FunctionPass *createARMLoadStoreOptimizationPass(); +FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMConstantIslandPass(); } // end namespace llvm; diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 4ac6857..594811d 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -28,6 +28,8 @@ def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE", "ARM v5TE, v5TEj, v5TExp">; def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6", "ARM v6">; +def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2", + "ARM v6t2">; def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", "ARM v7A">; def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2", @@ -92,9 +94,11 @@ def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>; def : Proc<"mpcorenovfp", [ArchV6]>; def : Proc<"mpcore", [ArchV6, FeatureVFP2]>; -def : Proc<"arm1156t2-s", [ArchV6, FeatureThumb2]>; -def : Proc<"arm1156t2f-s", [ArchV6, FeatureThumb2, FeatureVFP2]>; +// V6T2 Processors. +def : Proc<"arm1156t2-s", [ArchV6T2, FeatureThumb2]>; +def : Proc<"arm1156t2f-s", [ArchV6T2, FeatureThumb2, FeatureVFP2]>; +// V7 Processors. def : Proc<"cortex-a8", [ArchV7A, FeatureThumb2, FeatureNEON]>; def : Proc<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 6cd786e..f126760 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -17,6 +17,11 @@ class CCIfSubtarget<string F, CCAction A>: class CCIfAlign<string Align, CCAction A>: CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>; +/// CCIfFloatABI - Match of the float ABI and the arg. ABIType may be "Hard" or +/// "Soft". +class CCIfFloatABI<string ABIType, CCAction A>: + CCIf<!strconcat("llvm::FloatABIType == llvm::FloatABI::", ABIType), A>; + //===----------------------------------------------------------------------===// // ARM APCS Calling Convention //===----------------------------------------------------------------------===// @@ -43,9 +48,10 @@ def RetCC_ARM_APCS : CallingConv<[ ]>; //===----------------------------------------------------------------------===// -// ARM AAPCS (EABI) Calling Convention +// ARM AAPCS (EABI) Calling Convention, common parts //===----------------------------------------------------------------------===// -def CC_ARM_AAPCS : CallingConv<[ + +def CC_ARM_AAPCS_Common : CallingConv<[ CCIfType<[i8, i16], CCPromoteToType<i32>>, @@ -53,23 +59,51 @@ def CC_ARM_AAPCS : CallingConv<[ // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register // (and the same is true for f64 if VFP is not enabled) CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>, - CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, - - CCIfType<[f32], CCBitConvertToType<i32>>, CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&" "ArgFlags.getOrigAlign() != 8", CCAssignToReg<[R0, R1, R2, R3]>>>, - CCIfType<[i32], CCAssignToStack<4, 4>>, + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[f64], CCAssignToStack<8, 8>> ]>; -def RetCC_ARM_AAPCS : CallingConv<[ +def RetCC_ARM_AAPCS_Common : CallingConv<[ + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> +]>; + +//===----------------------------------------------------------------------===// +// ARM AAPCS (EABI) Calling Convention +//===----------------------------------------------------------------------===// + +def CC_ARM_AAPCS : CallingConv<[ + CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType<i32>>, + CCDelegateTo<CC_ARM_AAPCS_Common> +]>; + +def RetCC_ARM_AAPCS : CallingConv<[ CCIfType<[f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, + CCIfType<[f32], CCBitConvertToType<i32>>, + CCDelegateTo<RetCC_ARM_AAPCS_Common> +]>; - CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, - CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> +//===----------------------------------------------------------------------===// +// ARM AAPCS-VFP (EABI) Calling Convention +//===----------------------------------------------------------------------===// + +def CC_ARM_AAPCS_VFP : CallingConv<[ + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + S9, S10, S11, S12, S13, S14, S15]>>, + CCDelegateTo<CC_ARM_AAPCS_Common> +]>; + +def RetCC_ARM_AAPCS_VFP : CallingConv<[ + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + S9, S10, S11, S12, S13, S14, S15]>>, + CCDelegateTo<RetCC_ARM_AAPCS_Common> ]>; //===----------------------------------------------------------------------===// @@ -77,11 +111,19 @@ def RetCC_ARM_AAPCS : CallingConv<[ //===----------------------------------------------------------------------===// def CC_ARM : CallingConv<[ + CCIfSubtarget<"isAAPCS_ABI()", + CCIfSubtarget<"hasVFP2()", + CCIfFloatABI<"Hard", + CCDelegateTo<CC_ARM_AAPCS_VFP>>>>, CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>, CCDelegateTo<CC_ARM_APCS> ]>; def RetCC_ARM : CallingConv<[ + CCIfSubtarget<"isAAPCS_ABI()", + CCIfSubtarget<"hasVFP2()", + CCIfFloatABI<"Hard", + CCDelegateTo<RetCC_ARM_AAPCS_VFP>>>>, CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>, CCDelegateTo<RetCC_ARM_APCS> ]>; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c0fd9dc..ec8bd1f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1101,7 +1101,12 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { else RC = ARM::GPRRegisterClass; - if (RegVT == MVT::f64) { + if (FloatABIType == FloatABI::Hard) { + if (RegVT == MVT::f32) + RC = ARM::SPRRegisterClass; + else if (RegVT == MVT::f64) + RC = ARM::DPRRegisterClass; + } else if (RegVT == MVT::f64) { // f64 is passed in pairs of GPRs and must be combined. RegVT = MVT::i32; } else if (!((RegVT == MVT::i32) || (RegVT == MVT::f32))) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 680e772..cc9f1a5 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -451,7 +451,7 @@ multiclass AsXI1_bin_c_irs<bits<4> opcod, string opc, PatFrag opnode> { /// the function. The first operand is the ID# for this instruction, the second /// is the index into the MachineConstantPool that this is, the third is the /// size in bytes of this constant pool entry. -let isNotDuplicable = 1 in +let neverHasSideEffects = 1, isNotDuplicable = 1 in def CONSTPOOL_ENTRY : PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, i32imm:$size), @@ -771,6 +771,7 @@ def STM : AXI4st<(outs), // Move Instructions. // +let neverHasSideEffects = 1 in def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, "mov", " $dst, $src", []>, UnaryDP; def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, @@ -946,6 +947,7 @@ def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; // Extra precision multiplies with low / high results +let neverHasSideEffects = 1 in { def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), "smull", " $ldst, $hdst, $a, $b", []>; @@ -967,6 +969,7 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), "umaal", " $ldst, $hdst, $a, $b", []>, Requires<[IsARM, HasV6]>; +} // neverHasSideEffects // Most significant word multiply def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index ffb83a8..54232f6 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -298,6 +298,7 @@ def tADDrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "add $dst, $lhs, $rhs", [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>; +let neverHasSideEffects = 1 in def tADDhirr : TIt<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs), "add $dst, $rhs @ addhirr", []>; @@ -387,6 +388,7 @@ def tMOVi8 : TI<(outs tGPR:$dst), (ins i32imm:$src), // Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy', // which is MOV(3). This also supports high registers. +let neverHasSideEffects = 1 in { def tMOVr : TI<(outs tGPR:$dst), (ins tGPR:$src), "cpy $dst, $src", []>; def tMOVhir2lor : TI<(outs tGPR:$dst), (ins GPR:$src), @@ -395,6 +397,7 @@ def tMOVlor2hir : TI<(outs GPR:$dst), (ins tGPR:$src), "cpy $dst, $src\t@ lor2hir", []>; def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src), "cpy $dst, $src\t@ hir2hir", []>; +} // neverHasSideEffects def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "mul $dst, $rhs", diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 0247daf..9104c77 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -192,11 +192,13 @@ def FCVTSD : AI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, let Inst{7-4} = 0b1100; } +let neverHasSideEffects = 1 in { def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a), "fcpyd", " $dst, $a", []>; def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a), "fcpys", " $dst, $a", []>; +} // neverHasSideEffects def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), "fnegd", " $dst, $a", diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 963ff0d..684ecb4 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -17,24 +17,31 @@ #include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Support/Compiler.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumLDMGened , "Number of ldm instructions generated"); STATISTIC(NumSTMGened , "Number of stm instructions generated"); STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); +STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); + +/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine +/// load / store instructions to form ldm / stm instructions. namespace { struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass { @@ -81,12 +88,6 @@ namespace { char ARMLoadStoreOpt::ID = 0; } -/// createARMLoadStoreOptimizationPass - returns an instance of the load / store -/// optimization pass. -FunctionPass *llvm::createARMLoadStoreOptimizationPass() { - return new ARMLoadStoreOpt(); -} - static int getLoadStoreMultipleOpcode(int Opcode) { switch (Opcode) { case ARM::LDR: @@ -582,6 +583,23 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { RS->forward(prior(Loc)); } +static int getMemoryOpOffset(const MachineInstr *MI) { + int Opcode = MI->getOpcode(); + bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; + unsigned NumOperands = MI->getDesc().getNumOperands(); + unsigned OffField = MI->getOperand(NumOperands-3).getImm(); + int Offset = isAM2 + ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; + if (isAM2) { + if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) + Offset = -Offset; + } else { + if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) + Offset = -Offset; + } + return Offset; +} + /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR /// ops of the same base and incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { @@ -606,22 +624,11 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { bool isMemOp = isMemoryOp(MBBI); if (isMemOp) { int Opcode = MBBI->getOpcode(); - bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; unsigned Size = getLSMultipleTransferSize(MBBI); unsigned Base = MBBI->getOperand(1).getReg(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); - unsigned NumOperands = MBBI->getDesc().getNumOperands(); - unsigned OffField = MBBI->getOperand(NumOperands-3).getImm(); - int Offset = isAM2 - ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; - if (isAM2) { - if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } else { - if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } + int Offset = getMemoryOpOffset(MBBI); // Watch out for: // r4 := ldr [r5] // r5 := ldr [r5, #4] @@ -744,6 +751,17 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { return NumMerges > 0; } +namespace { + struct OffsetCompare { + bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { + int LOffset = getMemoryOpOffset(LHS); + int ROffset = getMemoryOpOffset(RHS); + assert(LHS == RHS || LOffset != ROffset); + return LOffset > ROffset; + } + }; +} + /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op /// (bx lr) into the preceeding stack restore so it directly restore the value /// of LR into pc. @@ -788,3 +806,277 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { delete RS; return Modified; } + + +/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move +/// load / stores from consecutive locations close to make it more +/// likely they will be combined later. + +namespace { + struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ + static char ID; + ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} + + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM pre- register allocation load / store optimization pass"; + } + + private: + bool RescheduleOps(MachineBasicBlock *MBB, + SmallVector<MachineInstr*, 4> &Ops, + unsigned Base, bool isLd, + DenseMap<MachineInstr*, unsigned> &MI2LocMap); + bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); + }; + char ARMPreAllocLoadStoreOpt::ID = 0; +} + +bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + TII = Fn.getTarget().getInstrInfo(); + TRI = Fn.getTarget().getRegisterInfo(); + MRI = &Fn.getRegInfo(); + + bool Modified = false; + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) + Modified |= RescheduleLoadStoreInstrs(MFI); + + return Modified; +} + +static bool IsSafeToMove(bool isLd, unsigned Base, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + SmallPtrSet<MachineInstr*, 4> MoveOps, + const TargetRegisterInfo *TRI) { + // Are there stores / loads / calls between them? + // FIXME: This is overly conservative. We should make use of alias information + // some day. + while (++I != E) { + const TargetInstrDesc &TID = I->getDesc(); + if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) + return false; + if (isLd && TID.mayStore()) + return false; + if (!isLd) { + if (TID.mayLoad()) + return false; + // It's not safe to move the first 'str' down. + // str r1, [r0] + // strh r5, [r0] + // str r4, [r0, #+4] + if (TID.mayStore() && !MoveOps.count(&*I)) + return false; + } + for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { + MachineOperand &MO = I->getOperand(j); + if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base)) + return false; + } + } + return true; +} + +bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, + SmallVector<MachineInstr*, 4> &Ops, + unsigned Base, bool isLd, + DenseMap<MachineInstr*, unsigned> &MI2LocMap) { + bool RetVal = false; + + // Sort by offset (in reverse order). + std::sort(Ops.begin(), Ops.end(), OffsetCompare()); + + // The loads / stores of the same base are in order. Scan them from first to + // last and check for the followins: + // 1. Any def of base. + // 2. Any gaps. + while (Ops.size() > 1) { + unsigned FirstLoc = ~0U; + unsigned LastLoc = 0; + MachineInstr *FirstOp = 0; + MachineInstr *LastOp = 0; + int LastOffset = 0; + unsigned LastBytes = 0; + unsigned NumMove = 0; + for (int i = Ops.size() - 1; i >= 0; --i) { + MachineInstr *Op = Ops[i]; + unsigned Loc = MI2LocMap[Op]; + if (Loc <= FirstLoc) { + FirstLoc = Loc; + FirstOp = Op; + } + if (Loc >= LastLoc) { + LastLoc = Loc; + LastOp = Op; + } + + int Offset = getMemoryOpOffset(Op); + unsigned Bytes = getLSMultipleTransferSize(Op); + if (LastBytes) { + if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) + break; + } + LastOffset = Offset; + LastBytes = Bytes; + if (++NumMove == 4) + break; + } + + if (NumMove <= 1) + Ops.pop_back(); + else { + SmallPtrSet<MachineInstr*, 4> MoveOps; + for (int i = NumMove-1; i >= 0; --i) + MoveOps.insert(Ops[i]); + + // Be conservative, if the instructions are too far apart, don't + // move them. We want to limit the increase of register pressure. + bool DoMove = (LastLoc - FirstLoc) < NumMove*4; + if (DoMove) + DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI); + if (!DoMove) { + for (unsigned i = 0; i != NumMove; ++i) + Ops.pop_back(); + } else { + // This is the new location for the loads / stores. + MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; + while (InsertPos != MBB->end() && MoveOps.count(InsertPos)) + ++InsertPos; + for (unsigned i = 0; i != NumMove; ++i) { + MachineInstr *Op = Ops.back(); + Ops.pop_back(); + MBB->splice(InsertPos, MBB, Op); + } + + NumLdStMoved += NumMove; + RetVal = true; + } + } + } + + return RetVal; +} + +bool +ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { + bool RetVal = false; + + DenseMap<MachineInstr*, unsigned> MI2LocMap; + DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; + DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; + SmallVector<unsigned, 4> LdBases; + SmallVector<unsigned, 4> StBases; + + unsigned Loc = 0; + MachineBasicBlock::iterator MBBI = MBB->begin(); + MachineBasicBlock::iterator E = MBB->end(); + while (MBBI != E) { + for (; MBBI != E; ++MBBI) { + MachineInstr *MI = MBBI; + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.isCall() || TID.isTerminator()) { + // Stop at barriers. + ++MBBI; + break; + } + + MI2LocMap[MI] = Loc++; + if (!isMemoryOp(MI)) + continue; + unsigned PredReg = 0; + if (getInstrPredicate(MI, PredReg) != ARMCC::AL) + continue; + + int Opcode = MI->getOpcode(); + bool isLd = Opcode == ARM::LDR || + Opcode == ARM::FLDS || Opcode == ARM::FLDD; + unsigned Base = MI->getOperand(1).getReg(); + int Offset = getMemoryOpOffset(MI); + + bool StopHere = false; + if (isLd) { + DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = + Base2LdsMap.find(Base); + if (BI != Base2LdsMap.end()) { + for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { + if (Offset == getMemoryOpOffset(BI->second[i])) { + StopHere = true; + break; + } + } + if (!StopHere) + BI->second.push_back(MI); + } else { + SmallVector<MachineInstr*, 4> MIs; + MIs.push_back(MI); + Base2LdsMap[Base] = MIs; + LdBases.push_back(Base); + } + } else { + DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = + Base2StsMap.find(Base); + if (BI != Base2StsMap.end()) { + for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { + if (Offset == getMemoryOpOffset(BI->second[i])) { + StopHere = true; + break; + } + } + if (!StopHere) + BI->second.push_back(MI); + } else { + SmallVector<MachineInstr*, 4> MIs; + MIs.push_back(MI); + Base2StsMap[Base] = MIs; + StBases.push_back(Base); + } + } + + if (StopHere) { + // Found a duplicate (a base+offset combination that's seen earlier). Backtrack. + --Loc; + break; + } + } + + // Re-schedule loads. + for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { + unsigned Base = LdBases[i]; + SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; + if (Lds.size() > 1) + RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); + } + + // Re-schedule stores. + for (unsigned i = 0, e = StBases.size(); i != e; ++i) { + unsigned Base = StBases[i]; + SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; + if (Sts.size() > 1) + RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); + } + + if (MBBI != E) { + Base2LdsMap.clear(); + Base2StsMap.clear(); + LdBases.clear(); + StBases.clear(); + } + } + + return RetVal; +} + + +/// createARMLoadStoreOptimizationPass - returns an instance of the load / store +/// optimization pass. +FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { + if (PreAlloc) + return new ARMPreAllocLoadStoreOpt(); + return new ARMLoadStoreOpt(); +} diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index b95d1f9..ebe7d58 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -219,3 +219,18 @@ def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8, // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; + +//===----------------------------------------------------------------------===// +// Subregister Set Definitions... now that we have all of the pieces, define the +// sub registers for each register. +// + +def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15], + [S0, S2, S4, S6, S8, S10, S12, S14, + S16, S18, S20, S22, S24, S26, S28, S30]>; + +def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15], + [S1, S3, S5, S7, S9, S11, S13, S15, + S17, S19, S21, S23, S25, S27, S29, S31]>; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index ef78cd5..a978380 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -14,6 +14,8 @@ #include "ARMSubtarget.h" #include "ARMGenSubtarget.inc" #include "llvm/Module.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, @@ -28,6 +30,10 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, , CPUString("generic") , TargetType(isELF) // Default to ELF unless otherwise specified. , TargetABI(ARM_ABI_APCS) { + // default to soft float ABI + if (FloatABIType == FloatABI::Default) + FloatABIType = FloatABI::Soft; + // Determine default and user specified characteristics // Parse features string. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 8b469cf..0704055 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -23,7 +23,7 @@ class Module; class ARMSubtarget : public TargetSubtarget { protected: enum ARMArchEnum { - V4T, V5T, V5TE, V6, V7A + V4T, V5T, V5TE, V6, V6T2, V7A }; enum ARMFPEnum { @@ -92,6 +92,7 @@ protected: bool hasV5TOps() const { return ARMArchVersion >= V5T; } bool hasV5TEOps() const { return ARMArchVersion >= V5TE; } bool hasV6Ops() const { return ARMArchVersion >= V6; } + bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; } bool hasV7Ops() const { return ARMArchVersion >= V7A; } bool hasVFP2() const { return ARMFPUType >= VFPv2; } @@ -105,6 +106,7 @@ protected: bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; } bool isThumb() const { return IsThumb; } + bool isThumb1() const { return IsThumb && (ThumbMode == Thumb1); } bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); } bool useThumbBacktraces() const { return UseThumbBacktraces; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 1dc7d19..7033907 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -23,6 +23,9 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +static cl::opt<bool> +EnablePreLdStOpti("arm-pre-alloc-loadstore-opti", cl::Hidden, + cl::desc("Enable pre-regalloc load store optimization pass")); static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden, cl::desc("Disable load store optimization pass")); static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden, @@ -144,6 +147,16 @@ bool ARMTargetMachine::addInstSelector(PassManagerBase &PM, return false; } +bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + if (!EnablePreLdStOpti) + return false; + // FIXME: temporarily disabling load / store optimization pass for Thumb mode. + if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb()) + PM.add(createARMLoadStoreOptimizationPass(true)); + return true; +} + bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // FIXME: temporarily disabling load / store optimization pass for Thumb mode. diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 916a8aa..7192c1b 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -71,6 +71,7 @@ public: // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, |