diff options
Diffstat (limited to 'lib/Target/ARM')
23 files changed, 1168 insertions, 233 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 594811d..9001e50 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -45,62 +45,72 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2", // ARM Processors supported. // -class Proc<string Name, list<SubtargetFeature> Features> - : Processor<Name, NoItineraries, Features>; +include "ARMSchedule.td" + +class ProcNoItin<string Name, list<SubtargetFeature> Features> + : Processor<Name, GenericItineraries, Features>; // V4 Processors. -def : Proc<"generic", []>; -def : Proc<"arm8", []>; -def : Proc<"arm810", []>; -def : Proc<"strongarm", []>; -def : Proc<"strongarm110", []>; -def : Proc<"strongarm1100", []>; -def : Proc<"strongarm1110", []>; +def : ProcNoItin<"generic", []>; +def : ProcNoItin<"arm8", []>; +def : ProcNoItin<"arm810", []>; +def : ProcNoItin<"strongarm", []>; +def : ProcNoItin<"strongarm110", []>; +def : ProcNoItin<"strongarm1100", []>; +def : ProcNoItin<"strongarm1110", []>; // V4T Processors. -def : Proc<"arm7tdmi", [ArchV4T]>; -def : Proc<"arm7tdmi-s", [ArchV4T]>; -def : Proc<"arm710t", [ArchV4T]>; -def : Proc<"arm720t", [ArchV4T]>; -def : Proc<"arm9", [ArchV4T]>; -def : Proc<"arm9tdmi", [ArchV4T]>; -def : Proc<"arm920", [ArchV4T]>; -def : Proc<"arm920t", [ArchV4T]>; -def : Proc<"arm922t", [ArchV4T]>; -def : Proc<"arm940t", [ArchV4T]>; -def : Proc<"ep9312", [ArchV4T]>; +def : ProcNoItin<"arm7tdmi", [ArchV4T]>; +def : ProcNoItin<"arm7tdmi-s", [ArchV4T]>; +def : ProcNoItin<"arm710t", [ArchV4T]>; +def : ProcNoItin<"arm720t", [ArchV4T]>; +def : ProcNoItin<"arm9", [ArchV4T]>; +def : ProcNoItin<"arm9tdmi", [ArchV4T]>; +def : ProcNoItin<"arm920", [ArchV4T]>; +def : ProcNoItin<"arm920t", [ArchV4T]>; +def : ProcNoItin<"arm922t", [ArchV4T]>; +def : ProcNoItin<"arm940t", [ArchV4T]>; +def : ProcNoItin<"ep9312", [ArchV4T]>; // V5T Processors. -def : Proc<"arm10tdmi", [ArchV5T]>; -def : Proc<"arm1020t", [ArchV5T]>; +def : ProcNoItin<"arm10tdmi", [ArchV5T]>; +def : ProcNoItin<"arm1020t", [ArchV5T]>; // V5TE Processors. -def : Proc<"arm9e", [ArchV5TE]>; -def : Proc<"arm926ej-s", [ArchV5TE]>; -def : Proc<"arm946e-s", [ArchV5TE]>; -def : Proc<"arm966e-s", [ArchV5TE]>; -def : Proc<"arm968e-s", [ArchV5TE]>; -def : Proc<"arm10e", [ArchV5TE]>; -def : Proc<"arm1020e", [ArchV5TE]>; -def : Proc<"arm1022e", [ArchV5TE]>; -def : Proc<"xscale", [ArchV5TE]>; -def : Proc<"iwmmxt", [ArchV5TE]>; +def : ProcNoItin<"arm9e", [ArchV5TE]>; +def : ProcNoItin<"arm926ej-s", [ArchV5TE]>; +def : ProcNoItin<"arm946e-s", [ArchV5TE]>; +def : ProcNoItin<"arm966e-s", [ArchV5TE]>; +def : ProcNoItin<"arm968e-s", [ArchV5TE]>; +def : ProcNoItin<"arm10e", [ArchV5TE]>; +def : ProcNoItin<"arm1020e", [ArchV5TE]>; +def : ProcNoItin<"arm1022e", [ArchV5TE]>; +def : ProcNoItin<"xscale", [ArchV5TE]>; +def : ProcNoItin<"iwmmxt", [ArchV5TE]>; // V6 Processors. -def : Proc<"arm1136j-s", [ArchV6]>; -def : Proc<"arm1136jf-s", [ArchV6, FeatureVFP2]>; -def : Proc<"arm1176jz-s", [ArchV6]>; -def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>; -def : Proc<"mpcorenovfp", [ArchV6]>; -def : Proc<"mpcore", [ArchV6, FeatureVFP2]>; +def : Processor<"arm1136j-s", V6Itineraries, + [ArchV6]>; +def : Processor<"arm1136jf-s", V6Itineraries, + [ArchV6, FeatureVFP2]>; +def : Processor<"arm1176jz-s", V6Itineraries, + [ArchV6]>; +def : Processor<"arm1176jzf-s", V6Itineraries, + [ArchV6, FeatureVFP2]>; +def : Processor<"mpcorenovfp", V6Itineraries, + [ArchV6]>; +def : Processor<"mpcore", V6Itineraries, + [ArchV6, FeatureVFP2]>; // V6T2 Processors. -def : Proc<"arm1156t2-s", [ArchV6T2, FeatureThumb2]>; -def : Proc<"arm1156t2f-s", [ArchV6T2, FeatureThumb2, FeatureVFP2]>; +def : Processor<"arm1156t2-s", V6Itineraries, + [ArchV6T2, FeatureThumb2]>; +def : Processor<"arm1156t2f-s", V6Itineraries, + [ArchV6T2, FeatureThumb2, FeatureVFP2]>; // V7 Processors. -def : Proc<"cortex-a8", [ArchV7A, FeatureThumb2, FeatureNEON]>; -def : Proc<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; +def : ProcNoItin<"cortex-a8", [ArchV7A, FeatureThumb2, FeatureNEON]>; +def : ProcNoItin<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index f126760..47151e6 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -17,11 +17,6 @@ class CCIfSubtarget<string F, CCAction A>: class CCIfAlign<string Align, CCAction A>: CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>; -/// CCIfFloatABI - Match of the float ABI and the arg. ABIType may be "Hard" or -/// "Soft". -class CCIfFloatABI<string ABIType, CCAction A>: - CCIf<!strconcat("llvm::FloatABIType == llvm::FloatABI::", ABIType), A>; - //===----------------------------------------------------------------------===// // ARM APCS Calling Convention //===----------------------------------------------------------------------===// @@ -105,25 +100,3 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[ S9, S10, S11, S12, S13, S14, S15]>>, CCDelegateTo<RetCC_ARM_AAPCS_Common> ]>; - -//===----------------------------------------------------------------------===// -// ARM Calling Convention Dispatch -//===----------------------------------------------------------------------===// - -def CC_ARM : CallingConv<[ - CCIfSubtarget<"isAAPCS_ABI()", - CCIfSubtarget<"hasVFP2()", - CCIfFloatABI<"Hard", - CCDelegateTo<CC_ARM_AAPCS_VFP>>>>, - CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>, - CCDelegateTo<CC_ARM_APCS> -]>; - -def RetCC_ARM : CallingConv<[ - CCIfSubtarget<"isAAPCS_ABI()", - CCIfSubtarget<"hasVFP2()", - CCIfFloatABI<"Hard", - CCDelegateTo<RetCC_ARM_AAPCS_VFP>>>>, - CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>, - CCDelegateTo<RetCC_ARM_APCS> -]>; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 44fac12..f6629fe 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -900,6 +900,10 @@ void Emitter<CodeEmitter>::emitMiscLoadStoreInstruction(const MachineInstr &MI, // Set first operand Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + // Skip LDRD and STRD's second operand. + if (TID.Opcode == ARM::LDRD || TID.Opcode == ARM::STRD) + ++OpIdx; + // Set second operand if (ImplicitRn) // Special handling for implicit use (e.g. PC). diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index ca3a9cb..1ed9e80 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -52,8 +52,13 @@ public: virtual const char *getPassName() const { return "ARM Instruction Selection"; - } - + } + + /// getI32Imm - Return a target constant with the specified value, of type i32. + inline SDValue getI32Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); + } + SDNode *Select(SDValue Op); virtual void InstructionSelect(); bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base, @@ -84,6 +89,9 @@ public: bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base, SDValue &OffImm); + bool SelectShifterOperand(SDValue Op, SDValue N, + SDValue &BaseReg, SDValue &Opc); + bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A, SDValue &B, SDValue &C); @@ -509,8 +517,30 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N, return false; } +bool ARMDAGToDAGISel::SelectShifterOperand(SDValue Op, + SDValue N, + SDValue &BaseReg, + SDValue &Opc) { + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + + // Don't match base register only case. That is matched to a separate + // lower complexity pattern with explicit register operand. + if (ShOpcVal == ARM_AM::no_shift) return false; + + BaseReg = N.getOperand(0); + unsigned ShImmVal = 0; + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) + ShImmVal = RHS->getZExtValue() & 31; + else + return false; + + Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal)); + + return true; +} + bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op, - SDValue N, + SDValue N, SDValue &BaseReg, SDValue &ShReg, SDValue &Opc) { @@ -549,6 +579,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { switch (N->getOpcode()) { default: break; case ISD::Constant: { + // ARMv6T2 and later should materialize imms via MOV / MOVT pair. + if (Subtarget->hasV6T2Ops() || Subtarget->hasThumb2()) + break; + unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); bool UseCP = true; if (Subtarget->isThumb()) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ec8bd1f..2443625 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -292,6 +292,25 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10); setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2); + if (!Subtarget->isThumb()) { + // Use branch latency information to determine if-conversion limits. + // FIXME: If-converter should use instruction latency of the branch being + // eliminated to compute the threshold. For ARMv6, the branch "latency" + // varies depending on whether it's dynamically or statically predicted + // and on whether the destination is in the prefetch buffer. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const InstrItineraryData &InstrItins = Subtarget->getInstrItineraryData(); + unsigned Latency= InstrItins.getLatency(TII->get(ARM::Bcc).getSchedClass()); + if (Latency > 1) { + setIfCvtBlockSizeLimit(Latency-1); + if (Latency > 2) + setIfCvtDupBlockSizeLimit(Latency-2); + } else { + setIfCvtBlockSizeLimit(10); + setIfCvtDupBlockSizeLimit(2); + } + } + maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type // Do not enable CodePlacementOpt for now: it currently runs after the // ARMConstantIslandPass and messes up branch relaxation and placement @@ -415,7 +434,7 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, ARM::NoRegister }; unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 4); - if (Reg == 0) + if (Reg == 0) return false; // we didn't handle it unsigned i; @@ -487,6 +506,33 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, State); } +/// CCAssignFnForNode - Selects the correct CCAssignFn for a the +/// given CallingConvention value. +CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC, + bool Return) const { + switch (CC) { + default: + assert(0 && "Unsupported calling convention"); + case CallingConv::C: + case CallingConv::Fast: + // Use target triple & subtarget features to do actual dispatch. + if (Subtarget->isAAPCS_ABI()) { + if (Subtarget->hasVFP2() && + FloatABIType == FloatABI::Hard) + return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + else + return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); + } else + return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + case CallingConv::ARM_AAPCS_VFP: + return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + case CallingConv::ARM_AAPCS: + return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); + case CallingConv::ARM_APCS: + return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + } +} + /// LowerCallResult - Lower the result values of an ISD::CALL into the /// appropriate copies out of appropriate physical registers. This assumes that /// Chain/InFlag are the input chain/flag to use, and that TheCall is the call @@ -501,7 +547,8 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, SmallVector<CCValAssign, 16> RVLocs; bool isVarArg = TheCall->isVarArg(); CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); - CCInfo.AnalyzeCallResult(TheCall, RetCC_ARM); + CCInfo.AnalyzeCallResult(TheCall, + CCAssignFnForNode(CallingConv, /* Return*/ true)); SmallVector<SDValue, 8> ResultVals; @@ -586,8 +633,6 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { MVT RetVT = TheCall->getRetValType(0); SDValue Chain = TheCall->getChain(); unsigned CC = TheCall->getCallingConv(); - assert((CC == CallingConv::C || - CC == CallingConv::Fast) && "unknown calling convention"); bool isVarArg = TheCall->isVarArg(); SDValue Callee = TheCall->getCallee(); DebugLoc dl = TheCall->getDebugLoc(); @@ -595,7 +640,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeCallOperands(TheCall, CC_ARM); + CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC, /* Return*/ false)); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -788,7 +833,7 @@ SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); // Analyze return values of ISD::RET. - CCInfo.AnalyzeReturn(Op.getNode(), RetCC_ARM); + CCInfo.AnalyzeReturn(Op.getNode(), CCAssignFnForNode(CC, /* Return */ true)); // If this is the first return lowered for this function, add // the regs to the liveout set for the function. @@ -1085,7 +1130,8 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_ARM); + CCInfo.AnalyzeFormalArguments(Op.getNode(), + CCAssignFnForNode(CC, /* Return*/ false)); SmallVector<SDValue, 16> ArgValues; @@ -1456,7 +1502,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = (Subtarget->isThumb() || Subtarget->useThumbBacktraces()) + unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) ? ARM::R7 : ARM::R11; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 2dab2db..8f53e39 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -151,6 +151,7 @@ namespace llvm { /// unsigned ARMPCLabelIndex; + CCAssignFn *CCAssignFnForNode(unsigned CC, bool Return) const; SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, const SDValue &StackPtr, const CCValAssign &VA, SDValue Chain, SDValue Arg, ISD::ArgFlagsTy Flags); diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 4b0dbb5..d19fb8e 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -697,7 +697,6 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc()); - MBB.insert(MI, PopMI); for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (Reg == ARM::LR) { @@ -706,10 +705,15 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, continue; Reg = ARM::PC; PopMI->setDesc(get(ARM::tPOP_RET)); - MBB.erase(MI); + MI = MBB.erase(MI); } PopMI->addOperand(MachineOperand::CreateReg(Reg, true)); } + + // It's illegal to emit pop instruction without operands. + if (PopMI->getNumOperands() > 0) + MBB.insert(MI, PopMI); + return true; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index cc9f1a5..4707e3b 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -90,12 +90,12 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>; //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // -def HasV5T : Predicate<"Subtarget->hasV5TOps()">; -def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; -def HasV6 : Predicate<"Subtarget->hasV6Ops()">; -def IsThumb : Predicate<"Subtarget->isThumb()">; -def IsThumb2 : Predicate<"Subtarget->isThumb2()">; -def IsARM : Predicate<"!Subtarget->isThumb()">; +def HasV5T : Predicate<"Subtarget->hasV5TOps()">; +def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; +def HasV6 : Predicate<"Subtarget->hasV6Ops()">; +def IsThumb : Predicate<"Subtarget->isThumb()">; +def HasThumb2 : Predicate<"Subtarget->hasThumb2()">; +def IsARM : Predicate<"!Subtarget->isThumb()">; //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -539,7 +539,7 @@ let isReturn = 1, isTerminator = 1 in LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1", []>; -let isCall = 1, +let isCall = 1, Itinerary = IIC_Br, Defs = [R0, R1, R2, R3, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in { def BL : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops), @@ -567,7 +567,7 @@ let isCall = 1, } } -let isBranch = 1, isTerminator = 1 in { +let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in { // B is "predicable" since it can be xformed into a Bcc. let isBarrier = 1 in { let isPredicable = 1 in @@ -647,9 +647,8 @@ def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, let mayLoad = 1 in { // Load doubleword -def LDRD : AI3ldd<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - "ldr", "d $dst, $addr", - []>, Requires<[IsARM, HasV5T]>; +def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, + "ldr", "d $dst1, $addr", []>, Requires<[IsARM, HasV5T]>; // Indexed loads def LDR_PRE : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb), @@ -709,9 +708,8 @@ def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, // Store doubleword let mayStore = 1 in -def STRD : AI3std<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, - "str", "d $src, $addr", - []>, Requires<[IsARM, HasV5T]>; +def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),StMiscFrm, + "str", "d $src1, $addr", []>, Requires<[IsARM, HasV5T]>; // Indexed stores def STR_PRE : AI2stwpr<(outs GPR:$base_wb), @@ -1387,6 +1385,12 @@ def : ARMV5TEPat<(add GPR:$acc, include "ARMInstrThumb.td" //===----------------------------------------------------------------------===// +// Thumb2 Support +// + +include "ARMInstrThumb2.td" + +//===----------------------------------------------------------------------===// // Floating Point Support // diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 54232f6..9297f08 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -277,6 +277,7 @@ def tPUSH : TI<(outs), (ins reglist:$src1, variable_ops), // // Add with carry +let isCommutable = 1 in def tADC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "adc $dst, $rhs", [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>; @@ -311,6 +312,7 @@ def tADDrSPi : TI<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), "add $dst, $rhs * 4", []>; +let isCommutable = 1 in def tAND : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "and $dst, $rhs", [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>; @@ -358,6 +360,7 @@ def tCMPNZr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs), // TODO: A7-37: CMP(3) - cmp hi regs +let isCommutable = 1 in def tEOR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "eor $dst, $rhs", [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>; @@ -399,6 +402,7 @@ def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src), "cpy $dst, $src\t@ hir2hir", []>; } // neverHasSideEffects +let isCommutable = 1 in def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "mul $dst, $rhs", [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>; @@ -411,6 +415,7 @@ def tNEG : TI<(outs tGPR:$dst), (ins tGPR:$src), "neg $dst, $src", [(set tGPR:$dst, (ineg tGPR:$src))]>; +let isCommutable = 1 in def tORR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "orr $dst, $rhs", [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 168fb45..07c71da 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -10,3 +10,199 @@ // This file describes the Thumb2 instruction set. // //===----------------------------------------------------------------------===// + +// Shifted operands. No register controlled shifts for Thumb2. +// Note: We do not support rrx shifted operands yet. +def t2_so_reg : Operand<i32>, // reg imm + ComplexPattern<i32, 2, "SelectShifterOperand", + [shl,srl,sra,rotr]> { + let PrintMethod = "printSOOperand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +def LO16 : SDNodeXForm<imm, [{ + // Transformation function: shift the immediate value down into the low bits. + return getI32Imm((unsigned short)N->getZExtValue()); +}]>; + +def HI16 : SDNodeXForm<imm, [{ + // Transformation function: shift the immediate value down into the low bits. + return getI32Imm((unsigned)N->getZExtValue() >> 16); +}]>; + +def imm16high : PatLeaf<(i32 imm), [{ + // Returns true if all bits out of the [31..16] range are 0. + return ((N->getZExtValue() & 0xFFFF0000ULL) == N->getZExtValue()); +}], HI16>; + +def imm16high0xffff : PatLeaf<(i32 imm), [{ + // Returns true if lo 16 bits are set and this is a 32-bit value. + return ((N->getZExtValue() & 0x0000FFFFULL) == 0xFFFFULL); +}], HI16>; + +def imm0_4095 : PatLeaf<(i32 imm), [{ + return (uint32_t)N->getZExtValue() < 4096; +}]>; + +def imm0_4095_neg : PatLeaf<(i32 imm), [{ + return (uint32_t)-N->getZExtValue() < 4096; +}], imm_neg_XFORM>; + +def imm0_65535 : PatLeaf<(i32 imm), [{ + return N->getZExtValue() < 65536; +}]>; + +// A6.3.2 Modified immediate constants in Thumb instructions (#<const>) +// FIXME: Move it the the addrmode matcher code. +def t2_so_imm : PatLeaf<(i32 imm), [{ + uint64_t v = N->getZExtValue(); + if (v == 0 || v > 0xffffffffUL) return false; + // variant1 - 0b0000x - 8-bit which could be zero (not supported for now) + + // variant2 - 0b00nnx - 8-bit repeated inside the 32-bit room + unsigned hi16 = (unsigned)(v >> 16); + unsigned lo16 = (unsigned)(v & 0xffffUL); + bool valid = (hi16 == lo16) && ( + (v & 0x00ff00ffUL) == 0 || // type 0001x + (v & 0xff00ff00UL) == 0 || // type 0010x + ((lo16 >> 8) == (lo16 & 0xff))); // type 0011x + if (valid) return true; + + // variant3 - 0b01000..0b11111 - 8-bit shifted inside the 32-bit room + unsigned shift = CountLeadingZeros_32(v); + uint64_t mask = (0xff000000ULL >> shift); + // If valid, it is type 01000 + shift + return ((shift < 24) && (v & mask) > 0) && ((v & (~mask)) == 0); +}]>; + + +//===----------------------------------------------------------------------===// +// Thumb-2 to cover the functionality of the ARM instruction set. +// + +/// T2I_bin_irs - Defines a set of (op reg, {so_imm|reg|so_reg}) patterns for a +// binary operation that produces a value. +multiclass T2I_bin_irs<string opc, PatFrag opnode> { + // shifted imm + def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), + !strconcat(opc, " $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, + Requires<[HasThumb2]>; + // register + def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), + !strconcat(opc, " $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, + Requires<[HasThumb2]>; + // shifted register + def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), + !strconcat(opc, " $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, + Requires<[HasThumb2]>; +} + +/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the +/// instruction modifies the CPSR register. +let Defs = [CPSR] in { +multiclass T2I_bin_s_irs<string opc, PatFrag opnode> { + // shifted imm + def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), + !strconcat(opc, "s $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, + Requires<[HasThumb2]>; + + // register + def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), + !strconcat(opc, "s $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, + Requires<[HasThumb2]>; + + // shifted register + def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), + !strconcat(opc, "s $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, + Requires<[HasThumb2]>; +} +} + +/// T2I_bin_c_irs - Similar to T2I_bin_irs except it uses the 's' bit. Also the +/// instruction can optionally set the CPSR register. +let Uses = [CPSR] in { +multiclass T2I_bin_c_irs<string opc, PatFrag opnode> { + // shifted imm + def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs, cc_out:$s), + !strconcat(opc, "${s} $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, + Requires<[HasThumb2]>; + + // register + def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, cc_out:$s), + !strconcat(opc, "${s} $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, + Requires<[HasThumb2]>; + + // shifted register + def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs, cc_out:$s), + !strconcat(opc, "${s} $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, + Requires<[HasThumb2]>; +} +} + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions. +// + +//===----------------------------------------------------------------------===// +// Move Instructions. +// +def tMOVi16 : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), + "movw $dst, $src", + [(set GPR:$dst, imm0_65535:$src)]>, + Requires<[HasThumb2]>; + +let isTwoAddress = 1 in +def tMOVTi16 : PseudoInst<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), + "movt $dst, $imm", + [(set GPR:$dst, (or (and GPR:$src, 0xffff), + imm16high:$imm))]>, + Requires<[HasThumb2]>; + +def : Pat<(and (or GPR:$src, imm16high:$imm1), imm16high0xffff:$imm2), + (tMOVTi16 GPR:$src, (HI16 imm16high:$imm1))>, + Requires<[HasThumb2]>; + +def : Pat<(i32 imm:$imm), + (tMOVTi16 (tMOVi16 (LO16 imm:$imm)),(HI16 imm:$imm))>, + Requires<[HasThumb2]>; + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions. +// +defm t2ADD : T2I_bin_irs <"add", BinOpFrag<(add node:$LHS, node:$RHS)>>; +defm t2SUB : T2I_bin_irs <"sub", BinOpFrag<(sub node:$LHS, node:$RHS)>>; + +def tADDri12 : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), + "add $dst, $lhs, $rhs", + [(set GPR:$dst, (add GPR:$lhs, imm0_4095:$rhs))]>, + Requires<[HasThumb2]>; +def tSUBri12 : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), + "sub $dst, $lhs, $rhs", + [(set GPR:$dst, (add GPR:$lhs, imm0_4095_neg:$rhs))]>, + Requires<[HasThumb2]>; + +defm t2ADDS : T2I_bin_s_irs<"add", BinOpFrag<(addc node:$LHS, node:$RHS)>>; +defm t2SUBS : T2I_bin_s_irs<"sub", BinOpFrag<(subc node:$LHS, node:$RHS)>>; + +defm t2ADC : T2I_bin_c_irs<"adc", BinOpFrag<(adde node:$LHS, node:$RHS)>>; +defm t2SBC : T2I_bin_c_irs<"sbc", BinOpFrag<(sube node:$LHS, node:$RHS)>>; + + +def tMLS : PseudoInst<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), + "mls $dst, $a, $b, $c", + [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>, + Requires<[HasThumb2]>; + +def tORNrs : PseudoInst<(outs GPR:$dst), (ins GPR:$src1, t2_so_reg:$src2), + "orn $dst, $src1, $src2", + [(set GPR:$dst, (or GPR:$src1, (not t2_so_reg: $src2)))]>, + Requires<[HasThumb2]>; diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 684ecb4..59cf125 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -17,19 +17,22 @@ #include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" +#include "llvm/DerivedTypes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -39,6 +42,12 @@ STATISTIC(NumSTMGened , "Number of stm instructions generated"); STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); +STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); +STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); +STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); +STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); +STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); +STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine /// load / store instructions to form ldm / stm instructions. @@ -82,6 +91,8 @@ namespace { SmallVector<MachineBasicBlock::iterator, 4> &Merges); void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); + bool FixInvalidRegPairOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI); bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); }; @@ -586,13 +597,19 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { static int getMemoryOpOffset(const MachineInstr *MI) { int Opcode = MI->getOpcode(); bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; + bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; unsigned NumOperands = MI->getDesc().getNumOperands(); unsigned OffField = MI->getOperand(NumOperands-3).getImm(); int Offset = isAM2 - ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; + ? ARM_AM::getAM2Offset(OffField) + : (isAM3 ? ARM_AM::getAM3Offset(OffField) + : ARM_AM::getAM5Offset(OffField) * 4); if (isAM2) { if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) Offset = -Offset; + } else if (isAM3) { + if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) + Offset = -Offset; } else { if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) Offset = -Offset; @@ -600,6 +617,120 @@ static int getMemoryOpOffset(const MachineInstr *MI) { return Offset; } +static void InsertLDR_STR(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + int OffImm, bool isDef, + DebugLoc dl, unsigned NewOpc, + unsigned Reg, bool RegDeadKill, + unsigned BaseReg, bool BaseKill, + unsigned OffReg, bool OffKill, + ARMCC::CondCodes Pred, unsigned PredReg, + const TargetInstrInfo *TII) { + unsigned Offset; + if (OffImm < 0) + Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); + else + Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); + if (isDef) + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) + .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) + .addReg(BaseReg, getKillRegState(BaseKill)) + .addReg(OffReg, getKillRegState(OffKill)) + .addImm(Offset) + .addImm(Pred).addReg(PredReg); + else + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) + .addReg(Reg, getKillRegState(RegDeadKill)) + .addReg(BaseReg, getKillRegState(BaseKill)) + .addReg(OffReg, getKillRegState(OffKill)) + .addImm(Offset) + .addImm(Pred).addReg(PredReg); +} + +bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI) { + MachineInstr *MI = &*MBBI; + unsigned Opcode = MI->getOpcode(); + if (Opcode == ARM::LDRD || Opcode == ARM::STRD) { + unsigned EvenReg = MI->getOperand(0).getReg(); + unsigned OddReg = MI->getOperand(1).getReg(); + unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); + unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); + if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) + return false; + + bool isLd = Opcode == ARM::LDRD; + bool EvenDeadKill = isLd ? + MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); + bool OddDeadKill = isLd ? + MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); + const MachineOperand &BaseOp = MI->getOperand(2); + unsigned BaseReg = BaseOp.getReg(); + bool BaseKill = BaseOp.isKill(); + const MachineOperand &OffOp = MI->getOperand(3); + unsigned OffReg = OffOp.getReg(); + bool OffKill = OffOp.isKill(); + int OffImm = getMemoryOpOffset(MI); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + + if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { + // Ascending register numbers and no offset. It's safe to change it to a + // ldm or stm. + unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM; + if (isLd) { + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) + .addReg(BaseReg, getKillRegState(BaseKill)) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) + .addImm(Pred).addReg(PredReg) + .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) + .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); + ++NumLDRD2LDM; + } else { + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) + .addReg(BaseReg, getKillRegState(BaseKill)) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) + .addImm(Pred).addReg(PredReg) + .addReg(EvenReg, getKillRegState(EvenDeadKill)) + .addReg(OddReg, getKillRegState(OddDeadKill)); + ++NumSTRD2STM; + } + } else { + // Split into two instructions. + unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR; + DebugLoc dl = MBBI->getDebugLoc(); + // If this is a load and base register is killed, it may have been + // re-defed by the load, make sure the first load does not clobber it. + if (isLd && + (BaseKill || OffKill) && + (TRI->regsOverlap(EvenReg, BaseReg) || + (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { + assert(!TRI->regsOverlap(OddReg, BaseReg) && + (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill, + BaseReg, false, OffReg, false, Pred, PredReg, TII); + InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, + BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII); + } else { + InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, + EvenReg, EvenDeadKill, BaseReg, false, OffReg, false, + Pred, PredReg, TII); + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, + OddReg, OddDeadKill, BaseReg, BaseKill, OffReg, OffKill, + Pred, PredReg, TII); + } + if (isLd) + ++NumLDRD2LDR; + else + ++NumSTRD2STR; + } + + MBBI = prior(MBBI); + MBB.erase(MI); + } + return false; +} + /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR /// ops of the same base and incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { @@ -617,6 +748,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { RS->enterBasicBlock(&MBB); MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { + if (FixInvalidRegPairOp(MBB, MBBI)) + continue; + bool Advance = false; bool TryMerge = false; bool Clobber = false; @@ -817,8 +951,10 @@ namespace { static char ID; ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} + const TargetData *TD; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const ARMSubtarget *STI; MachineRegisterInfo *MRI; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -828,6 +964,11 @@ namespace { } private: + bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, + unsigned &NewOpc, unsigned &EvenReg, + unsigned &OddReg, unsigned &BaseReg, + unsigned &OffReg, unsigned &Offset, + unsigned &PredReg, ARMCC::CondCodes &Pred); bool RescheduleOps(MachineBasicBlock *MBB, SmallVector<MachineInstr*, 4> &Ops, unsigned Base, bool isLd, @@ -838,8 +979,10 @@ namespace { } bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + TD = Fn.getTarget().getTargetData(); TII = Fn.getTarget().getInstrInfo(); TRI = Fn.getTarget().getRegisterInfo(); + STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); MRI = &Fn.getRegInfo(); bool Modified = false; @@ -850,15 +993,19 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { return Modified; } -static bool IsSafeToMove(bool isLd, unsigned Base, - MachineBasicBlock::iterator I, - MachineBasicBlock::iterator E, - SmallPtrSet<MachineInstr*, 4> MoveOps, - const TargetRegisterInfo *TRI) { +static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + SmallPtrSet<MachineInstr*, 4> &MemOps, + SmallSet<unsigned, 4> &MemRegs, + const TargetRegisterInfo *TRI) { // Are there stores / loads / calls between them? // FIXME: This is overly conservative. We should make use of alias information // some day. + SmallSet<unsigned, 4> AddedRegPressure; while (++I != E) { + if (MemOps.count(&*I)) + continue; const TargetInstrDesc &TID = I->getDesc(); if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) return false; @@ -871,15 +1018,76 @@ static bool IsSafeToMove(bool isLd, unsigned Base, // str r1, [r0] // strh r5, [r0] // str r4, [r0, #+4] - if (TID.mayStore() && !MoveOps.count(&*I)) + if (TID.mayStore()) return false; } for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { MachineOperand &MO = I->getOperand(j); - if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base)) + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (MO.isDef() && TRI->regsOverlap(Reg, Base)) return false; + if (Reg != Base && !MemRegs.count(Reg)) + AddedRegPressure.insert(Reg); } } + + // Estimate register pressure increase due to the transformation. + if (MemRegs.size() <= 4) + // Ok if we are moving small number of instructions. + return true; + return AddedRegPressure.size() <= MemRegs.size() * 2; +} + +bool +ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, + DebugLoc &dl, + unsigned &NewOpc, unsigned &EvenReg, + unsigned &OddReg, unsigned &BaseReg, + unsigned &OffReg, unsigned &Offset, + unsigned &PredReg, + ARMCC::CondCodes &Pred) { + // FIXME: FLDS / FSTS -> FLDD / FSTD + unsigned Opcode = Op0->getOpcode(); + if (Opcode == ARM::LDR) + NewOpc = ARM::LDRD; + else if (Opcode == ARM::STR) + NewOpc = ARM::STRD; + else + return 0; + + // Must sure the base address satisfies i64 ld / st alignment requirement. + if (!Op0->hasOneMemOperand() || + !Op0->memoperands_begin()->getValue() || + Op0->memoperands_begin()->isVolatile()) + return false; + + unsigned Align = Op0->memoperands_begin()->getAlignment(); + unsigned ReqAlign = STI->hasV6Ops() + ? TD->getPrefTypeAlignment(Type::Int64Ty) : 8; // Pre-v6 need 8-byte align + if (Align < ReqAlign) + return false; + + // Then make sure the immediate offset fits. + int OffImm = getMemoryOpOffset(Op0); + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (OffImm < 0) { + AddSub = ARM_AM::sub; + OffImm = - OffImm; + } + if (OffImm >= 256) // 8 bits + return false; + Offset = ARM_AM::getAM3Opc(AddSub, OffImm); + + EvenReg = Op0->getOperand(0).getReg(); + OddReg = Op1->getOperand(0).getReg(); + if (EvenReg == OddReg) + return false; + BaseReg = Op0->getOperand(1).getReg(); + OffReg = Op0->getOperand(2).getReg(); + Pred = getInstrPredicate(Op0, PredReg); + dl = Op0->getDebugLoc(); return true; } @@ -902,6 +1110,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, MachineInstr *FirstOp = 0; MachineInstr *LastOp = 0; int LastOffset = 0; + unsigned LastOpcode = 0; unsigned LastBytes = 0; unsigned NumMove = 0; for (int i = Ops.size() - 1; i >= 0; --i) { @@ -916,6 +1125,10 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, LastOp = Op; } + unsigned Opcode = Op->getOpcode(); + if (LastOpcode && Opcode != LastOpcode) + break; + int Offset = getMemoryOpOffset(Op); unsigned Bytes = getLSMultipleTransferSize(Op); if (LastBytes) { @@ -924,34 +1137,80 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, } LastOffset = Offset; LastBytes = Bytes; - if (++NumMove == 4) + LastOpcode = Opcode; + if (++NumMove == 8) // FIXME: Tune break; } if (NumMove <= 1) Ops.pop_back(); else { - SmallPtrSet<MachineInstr*, 4> MoveOps; - for (int i = NumMove-1; i >= 0; --i) - MoveOps.insert(Ops[i]); + SmallPtrSet<MachineInstr*, 4> MemOps; + SmallSet<unsigned, 4> MemRegs; + for (int i = NumMove-1; i >= 0; --i) { + MemOps.insert(Ops[i]); + MemRegs.insert(Ops[i]->getOperand(0).getReg()); + } // Be conservative, if the instructions are too far apart, don't // move them. We want to limit the increase of register pressure. - bool DoMove = (LastLoc - FirstLoc) < NumMove*4; + bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. if (DoMove) - DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI); + DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, + MemOps, MemRegs, TRI); if (!DoMove) { for (unsigned i = 0; i != NumMove; ++i) Ops.pop_back(); } else { // This is the new location for the loads / stores. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; - while (InsertPos != MBB->end() && MoveOps.count(InsertPos)) + while (InsertPos != MBB->end() && MemOps.count(InsertPos)) ++InsertPos; - for (unsigned i = 0; i != NumMove; ++i) { - MachineInstr *Op = Ops.back(); + + // If we are moving a pair of loads / stores, see if it makes sense + // to try to allocate a pair of registers that can form register pairs. + MachineInstr *Op0 = Ops.back(); + MachineInstr *Op1 = Ops[Ops.size()-2]; + unsigned EvenReg = 0, OddReg = 0; + unsigned BaseReg = 0, OffReg = 0, PredReg = 0; + ARMCC::CondCodes Pred = ARMCC::AL; + unsigned NewOpc = 0; + unsigned Offset = 0; + DebugLoc dl; + if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, + EvenReg, OddReg, BaseReg, OffReg, + Offset, PredReg, Pred)) { + Ops.pop_back(); Ops.pop_back(); - MBB->splice(InsertPos, MBB, Op); + + // Form the pair instruction. + if (isLd) { + BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc)) + .addReg(EvenReg, RegState::Define) + .addReg(OddReg, RegState::Define) + .addReg(BaseReg).addReg(0).addImm(Offset) + .addImm(Pred).addReg(PredReg); + ++NumLDRDFormed; + } else { + BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc)) + .addReg(EvenReg) + .addReg(OddReg) + .addReg(BaseReg).addReg(0).addImm(Offset) + .addImm(Pred).addReg(PredReg); + ++NumSTRDFormed; + } + MBB->erase(Op0); + MBB->erase(Op1); + + // Add register allocation hints to form register pairs. + MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); + MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); + } else { + for (unsigned i = 0; i != NumMove; ++i) { + MachineInstr *Op = Ops.back(); + Ops.pop_back(); + MBB->splice(InsertPos, MBB, Op); + } } NumLdStMoved += NumMove; @@ -1039,7 +1298,8 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { } if (StopHere) { - // Found a duplicate (a base+offset combination that's seen earlier). Backtrack. + // Found a duplicate (a base+offset combination that's seen earlier). + // Backtrack. --Loc; break; } diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index 199858f..bbc1300 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -159,7 +159,7 @@ ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &sti) : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), TII(tii), STI(sti), - FramePtr((STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11) { + FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) { } static inline @@ -194,10 +194,6 @@ void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, .addReg(0).addImm(0).addImm(Pred).addReg(PredReg); } -const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const { - return &ARM::GPRRegClass; -} - /// isLowRegister - Returns true if the register is low register r0-r7. /// bool ARMRegisterInfo::isLowRegister(unsigned Reg) const { @@ -304,6 +300,191 @@ ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const { return false; } +const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const { + return &ARM::GPRRegClass; +} + +/// getAllocationOrder - Returns the register allocation order for a specified +/// register class in the form of a pair of TargetRegisterClass iterators. +std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> +ARMRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, + unsigned HintType, unsigned HintReg, + const MachineFunction &MF) const { + // Alternative register allocation orders when favoring even / odd registers + // of register pairs. + + // No FP, R9 is available. + static const unsigned GPREven1[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, + ARM::R9, ARM::R11 + }; + static const unsigned GPROdd1[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, + ARM::R8, ARM::R10 + }; + + // FP is R7, R9 is available. + static const unsigned GPREven2[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, + ARM::R9, ARM::R11 + }; + static const unsigned GPROdd2[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, + ARM::R8, ARM::R10 + }; + + // FP is R11, R9 is available. + static const unsigned GPREven3[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, + ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, + ARM::R9 + }; + static const unsigned GPROdd3[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9, + ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7, + ARM::R8 + }; + + // No FP, R9 is not available. + static const unsigned GPREven4[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8, + ARM::R11 + }; + static const unsigned GPROdd4[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, + ARM::R10 + }; + + // FP is R7, R9 is not available. + static const unsigned GPREven5[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8, + ARM::R11 + }; + static const unsigned GPROdd5[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, + ARM::R10 + }; + + // FP is R11, R9 is not available. + static const unsigned GPREven6[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, + ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8 + }; + static const unsigned GPROdd6[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R7, + ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8 + }; + + + if (HintType == ARMRI::RegPairEven) { + if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0) + // It's no longer possible to fulfill this hint. Return the default + // allocation order. + return std::make_pair(RC->allocation_order_begin(MF), + RC->allocation_order_end(MF)); + + if (!STI.isTargetDarwin() && !hasFP(MF)) { + if (!STI.isR9Reserved()) + return std::make_pair(GPREven1, + GPREven1 + (sizeof(GPREven1)/sizeof(unsigned))); + else + return std::make_pair(GPREven4, + GPREven4 + (sizeof(GPREven4)/sizeof(unsigned))); + } else if (FramePtr == ARM::R7) { + if (!STI.isR9Reserved()) + return std::make_pair(GPREven2, + GPREven2 + (sizeof(GPREven2)/sizeof(unsigned))); + else + return std::make_pair(GPREven5, + GPREven5 + (sizeof(GPREven5)/sizeof(unsigned))); + } else { // FramePtr == ARM::R11 + if (!STI.isR9Reserved()) + return std::make_pair(GPREven3, + GPREven3 + (sizeof(GPREven3)/sizeof(unsigned))); + else + return std::make_pair(GPREven6, + GPREven6 + (sizeof(GPREven6)/sizeof(unsigned))); + } + } else if (HintType == ARMRI::RegPairOdd) { + if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0) + // It's no longer possible to fulfill this hint. Return the default + // allocation order. + return std::make_pair(RC->allocation_order_begin(MF), + RC->allocation_order_end(MF)); + + if (!STI.isTargetDarwin() && !hasFP(MF)) { + if (!STI.isR9Reserved()) + return std::make_pair(GPROdd1, + GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned))); + else + return std::make_pair(GPROdd4, + GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned))); + } else if (FramePtr == ARM::R7) { + if (!STI.isR9Reserved()) + return std::make_pair(GPROdd2, + GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned))); + else + return std::make_pair(GPROdd5, + GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned))); + } else { // FramePtr == ARM::R11 + if (!STI.isR9Reserved()) + return std::make_pair(GPROdd3, + GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned))); + else + return std::make_pair(GPROdd6, + GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned))); + } + } + return std::make_pair(RC->allocation_order_begin(MF), + RC->allocation_order_end(MF)); +} + +/// ResolveRegAllocHint - Resolves the specified register allocation hint +/// to a physical register. Returns the physical register if it is successful. +unsigned +ARMRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg, + const MachineFunction &MF) const { + if (Reg == 0 || !isPhysicalRegister(Reg)) + return 0; + if (Type == 0) + return Reg; + else if (Type == (unsigned)ARMRI::RegPairOdd) + // Odd register. + return getRegisterPairOdd(Reg, MF); + else if (Type == (unsigned)ARMRI::RegPairEven) + // Even register. + return getRegisterPairEven(Reg, MF); + return 0; +} + +void +ARMRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, + MachineFunction &MF) const { + MachineRegisterInfo *MRI = &MF.getRegInfo(); + std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg); + if ((Hint.first == (unsigned)ARMRI::RegPairOdd || + Hint.first == (unsigned)ARMRI::RegPairEven) && + Hint.second && TargetRegisterInfo::isVirtualRegister(Hint.second)) { + // If 'Reg' is one of the even / odd register pair and it's now changed + // (e.g. coalesced) into a different register. The other register of the + // pair allocation hint must be updated to reflect the relationship + // change. + unsigned OtherReg = Hint.second; + Hint = MRI->getRegAllocationHint(OtherReg); + if (Hint.second == Reg) + // Make sure the pair has not already divorced. + MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg); + } +} + bool ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -1506,9 +1687,8 @@ unsigned ARMRegisterInfo::getRARegister() const { unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const { if (STI.isTargetDarwin() || hasFP(MF)) - return (STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11; - else - return ARM::SP; + return FramePtr; + return ARM::SP; } unsigned ARMRegisterInfo::getEHExceptionRegister() const { @@ -1525,4 +1705,152 @@ int ARMRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); } +unsigned ARMRegisterInfo::getRegisterPairEven(unsigned Reg, + const MachineFunction &MF) const { + switch (Reg) { + default: break; + // Return 0 if either register of the pair is a special register. + // So no R12, etc. + case ARM::R1: + return ARM::R0; + case ARM::R3: + // FIXME! + return STI.isThumb() ? 0 : ARM::R2; + case ARM::R5: + return ARM::R4; + case ARM::R7: + return isReservedReg(MF, ARM::R7) ? 0 : ARM::R6; + case ARM::R9: + return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8; + case ARM::R11: + return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10; + + case ARM::S1: + return ARM::S0; + case ARM::S3: + return ARM::S2; + case ARM::S5: + return ARM::S4; + case ARM::S7: + return ARM::S6; + case ARM::S9: + return ARM::S8; + case ARM::S11: + return ARM::S10; + case ARM::S13: + return ARM::S12; + case ARM::S15: + return ARM::S14; + case ARM::S17: + return ARM::S16; + case ARM::S19: + return ARM::S18; + case ARM::S21: + return ARM::S20; + case ARM::S23: + return ARM::S22; + case ARM::S25: + return ARM::S24; + case ARM::S27: + return ARM::S26; + case ARM::S29: + return ARM::S28; + case ARM::S31: + return ARM::S30; + + case ARM::D1: + return ARM::D0; + case ARM::D3: + return ARM::D2; + case ARM::D5: + return ARM::D4; + case ARM::D7: + return ARM::D6; + case ARM::D9: + return ARM::D8; + case ARM::D11: + return ARM::D10; + case ARM::D13: + return ARM::D12; + case ARM::D15: + return ARM::D14; + } + + return 0; +} + +unsigned ARMRegisterInfo::getRegisterPairOdd(unsigned Reg, + const MachineFunction &MF) const { + switch (Reg) { + default: break; + // Return 0 if either register of the pair is a special register. + // So no R12, etc. + case ARM::R0: + return ARM::R1; + case ARM::R2: + // FIXME! + return STI.isThumb() ? 0 : ARM::R3; + case ARM::R4: + return ARM::R5; + case ARM::R6: + return isReservedReg(MF, ARM::R7) ? 0 : ARM::R7; + case ARM::R8: + return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9; + case ARM::R10: + return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11; + + case ARM::S0: + return ARM::S1; + case ARM::S2: + return ARM::S3; + case ARM::S4: + return ARM::S5; + case ARM::S6: + return ARM::S7; + case ARM::S8: + return ARM::S9; + case ARM::S10: + return ARM::S11; + case ARM::S12: + return ARM::S13; + case ARM::S14: + return ARM::S15; + case ARM::S16: + return ARM::S17; + case ARM::S18: + return ARM::S19; + case ARM::S20: + return ARM::S21; + case ARM::S22: + return ARM::S23; + case ARM::S24: + return ARM::S25; + case ARM::S26: + return ARM::S27; + case ARM::S28: + return ARM::S29; + case ARM::S30: + return ARM::S31; + + case ARM::D0: + return ARM::D1; + case ARM::D2: + return ARM::D3; + case ARM::D4: + return ARM::D5; + case ARM::D6: + return ARM::D7; + case ARM::D8: + return ARM::D9; + case ARM::D10: + return ARM::D11; + case ARM::D12: + return ARM::D13; + case ARM::D14: + return ARM::D15; + } + + return 0; +} + #include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index e1d9efb..e8f4fd8 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -22,12 +22,17 @@ namespace llvm { class TargetInstrInfo; class Type; +/// Register allocation hints. +namespace ARMRI { + enum { + RegPairOdd = 1, + RegPairEven = 2 + }; +} + struct ARMRegisterInfo : public ARMGenRegisterInfo { const TargetInstrInfo &TII; const ARMSubtarget &STI; -private: - /// FramePtr - ARM physical register used as frame ptr. - unsigned FramePtr; public: ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI); @@ -49,10 +54,6 @@ public: /// if the register is a single precision VFP register. static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP); - /// getPointerRegClass - Return the register class to use to hold pointers. - /// This is used for addressing modes. - const TargetRegisterClass *getPointerRegClass() const; - /// Code Generation virtual methods... const TargetRegisterClass * getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const; @@ -65,6 +66,19 @@ public: bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; + const TargetRegisterClass *getPointerRegClass() const; + + std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> + getAllocationOrder(const TargetRegisterClass *RC, + unsigned HintType, unsigned HintReg, + const MachineFunction &MF) const; + + unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg, + const MachineFunction &MF) const; + + void UpdateRegAllocHint(unsigned Reg, unsigned NewReg, + MachineFunction &MF) const; + bool requiresRegisterScavenging(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; @@ -95,6 +109,15 @@ public: int getDwarfRegNum(unsigned RegNum, bool isEH) const; bool isLowRegister(unsigned Reg) const; + +private: + /// FramePtr - ARM physical register used as frame ptr. + unsigned FramePtr; + + unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const; + + unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const; + }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index ebe7d58..d864079 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -134,7 +134,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, GPRClass::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - if (Subtarget.useThumbBacktraces()) { + if (Subtarget.isTargetDarwin()) { if (Subtarget.isR9Reserved()) return ARM_GPR_AO_4; else @@ -154,7 +154,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); GPRClass::iterator I; - if (Subtarget.useThumbBacktraces()) { + if (Subtarget.isTargetDarwin()) { if (Subtarget.isR9Reserved()) { I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned)); } else { diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td new file mode 100644 index 0000000..75fa707 --- /dev/null +++ b/lib/Target/ARM/ARMSchedule.td @@ -0,0 +1,35 @@ +//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Functional units across ARM processors +// +def FU_iALU : FuncUnit; // Integer alu unit +def FU_iLdSt : FuncUnit; // Integer load / store unit +def FU_FpALU : FuncUnit; // FP alu unit +def FU_FpLdSt : FuncUnit; // FP load / store unit +def FU_Br : FuncUnit; // Branch unit + +//===----------------------------------------------------------------------===// +// Instruction Itinerary classes used for ARM +// +def IIC_iALU : InstrItinClass; +def IIC_iLoad : InstrItinClass; +def IIC_iStore : InstrItinClass; +def IIC_fpALU : InstrItinClass; +def IIC_fpLoad : InstrItinClass; +def IIC_fpStore : InstrItinClass; +def IIC_Br : InstrItinClass; + +//===----------------------------------------------------------------------===// +// Processor instruction itineraries. + +def GenericItineraries : ProcessorItineraries<[]>; + +include "ARMScheduleV6.td" diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td new file mode 100644 index 0000000..596a57f --- /dev/null +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -0,0 +1,22 @@ +//===- ARMSchedule.td - ARM v6 Scheduling Definitions ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM v6 processors. +// +//===----------------------------------------------------------------------===// + +def V6Itineraries : ProcessorItineraries<[ + InstrItinData<IIC_iALU , [InstrStage<1, [FU_iALU]>]>, + InstrItinData<IIC_iLoad , [InstrStage<2, [FU_iLdSt]>]>, + InstrItinData<IIC_iStore , [InstrStage<1, [FU_iLdSt]>]>, + InstrItinData<IIC_fpALU , [InstrStage<6, [FU_FpALU]>]>, + InstrItinData<IIC_fpLoad , [InstrStage<2, [FU_FpLdSt]>]>, + InstrItinData<IIC_fpStore , [InstrStage<1, [FU_FpLdSt]>]>, + InstrItinData<IIC_Br , [InstrStage<3, [FU_Br]>]> +]>; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index a978380..7ac7b49 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -24,7 +24,6 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, , ARMFPUType(None) , IsThumb(isThumb) , ThumbMode(Thumb1) - , UseThumbBacktraces(false) , IsR9Reserved(false) , stackAlignment(4) , CPUString("generic") @@ -83,8 +82,6 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, if (isAAPCS_ABI()) stackAlignment = 8; - if (isTargetDarwin()) { - UseThumbBacktraces = true; + if (isTargetDarwin()) IsR9Reserved = true; - } } diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 0704055..c3cc7ff 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -14,6 +14,7 @@ #ifndef ARMSUBTARGET_H #define ARMSUBTARGET_H +#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetSubtarget.h" #include <string> @@ -48,9 +49,6 @@ protected: /// ThumbMode - Indicates supported Thumb version. ThumbTypeEnum ThumbMode; - /// UseThumbBacktraces - True if we use thumb style backtraces. - bool UseThumbBacktraces; - /// IsR9Reserved - True if R9 is a not available as general purpose register. bool IsR9Reserved; @@ -61,6 +59,9 @@ protected: /// CPUString - String name of used CPU. std::string CPUString; + /// Selected instruction itineraries (one entry per itinerary class.) + InstrItineraryData InstrItins; + public: enum { isELF, isDarwin @@ -106,14 +107,17 @@ protected: bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; } bool isThumb() const { return IsThumb; } - bool isThumb1() const { return IsThumb && (ThumbMode == Thumb1); } - bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); } + bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); } + bool hasThumb2() const { return IsThumb && (ThumbMode >= Thumb2); } - bool useThumbBacktraces() const { return UseThumbBacktraces; } bool isR9Reserved() const { return IsR9Reserved; } const std::string & getCPUString() const { return CPUString; } + /// getInstrItins - Return the instruction itineraies based on subtarget + /// selection. + const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp index 4107dcc..42b8eae 100644 --- a/lib/Target/ARM/ARMTargetAsmInfo.cpp +++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp @@ -17,80 +17,42 @@ #include <cctype> using namespace llvm; - const char *const llvm::arm_asm_table[] = { - "{r0}", "r0", - "{r1}", "r1", - "{r2}", "r2", - "{r3}", "r3", - "{r4}", "r4", - "{r5}", "r5", - "{r6}", "r6", - "{r7}", "r7", - "{r8}", "r8", - "{r9}", "r9", - "{r10}", "r10", - "{r11}", "r11", - "{r12}", "r12", - "{r13}", "r13", - "{r14}", "r14", - "{lr}", "lr", - "{sp}", "sp", - "{ip}", "ip", - "{fp}", "fp", - "{sl}", "sl", - "{memory}", "memory", - "{cc}", "cc", - 0,0}; + "{r0}", "r0", + "{r1}", "r1", + "{r2}", "r2", + "{r3}", "r3", + "{r4}", "r4", + "{r5}", "r5", + "{r6}", "r6", + "{r7}", "r7", + "{r8}", "r8", + "{r9}", "r9", + "{r10}", "r10", + "{r11}", "r11", + "{r12}", "r12", + "{r13}", "r13", + "{r14}", "r14", + "{lr}", "lr", + "{sp}", "sp", + "{ip}", "ip", + "{fp}", "fp", + "{sl}", "sl", + "{memory}", "memory", + "{cc}", "cc", + 0,0 +}; ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM): ARMTargetAsmInfo<DarwinTargetAsmInfo>(TM) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); - GlobalPrefix = "_"; - PrivateGlobalPrefix = "L"; - LessPrivateGlobalPrefix = "l"; - StringConstantPrefix = "\1LC"; - BSSSection = 0; // no BSS section ZeroDirective = "\t.space\t"; ZeroFillDirective = "\t.zerofill\t"; // Uses .zerofill SetDirective = "\t.set\t"; - WeakRefDirective = "\t.weak_reference\t"; - WeakDefDirective = "\t.weak_definition "; - HiddenDirective = "\t.private_extern\t"; ProtectedDirective = NULL; - JumpTableDataSection = ".const"; - CStringSection = "\t.cstring"; HasDotTypeDotSizeDirective = false; - HasSingleParameterDotFile = false; - NeedsIndirectEncoding = true; - if (TM.getRelocationModel() == Reloc::Static) { - StaticCtorsSection = ".constructor"; - StaticDtorsSection = ".destructor"; - } else { - StaticCtorsSection = ".mod_init_func"; - StaticDtorsSection = ".mod_term_func"; - } - - // In non-PIC modes, emit a special label before jump tables so that the - // linker can perform more accurate dead code stripping. - if (TM.getRelocationModel() != Reloc::PIC_) { - // Emit a local label that is preserved until the linker runs. - JumpTableSpecialLabelPrefix = "l"; - } - - NeedsSet = true; - DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug"; - DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug"; - DwarfLineSection = ".section __DWARF,__debug_line,regular,debug"; - DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug"; - DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug"; - DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug"; - DwarfStrSection = ".section __DWARF,__debug_str,regular,debug"; - DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug"; - DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug"; - DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug"; - DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug"; + SupportsDebugInformation = true; } ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM): @@ -115,7 +77,7 @@ ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM): DwarfLocSection = "\t.section\t.debug_loc,\"\",%progbits"; DwarfARangesSection = "\t.section\t.debug_aranges,\"\",%progbits"; DwarfRangesSection = "\t.section\t.debug_ranges,\"\",%progbits"; - DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits"; + DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits"; if (Subtarget->isAAPCS_ABI()) { StaticCtorsSection = "\t.section .init_array,\"aw\",%init_array"; @@ -124,6 +86,7 @@ ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM): StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits"; StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits"; } + SupportsDebugInformation = true; } /// Count the number of comma-separated arguments. diff --git a/lib/Target/ARM/ARMTargetAsmInfo.h b/lib/Target/ARM/ARMTargetAsmInfo.h index 9e6f856..683692f 100644 --- a/lib/Target/ARM/ARMTargetAsmInfo.h +++ b/lib/Target/ARM/ARMTargetAsmInfo.h @@ -26,8 +26,7 @@ namespace llvm { template <class BaseTAI> struct ARMTargetAsmInfo : public BaseTAI { - explicit ARMTargetAsmInfo(const ARMTargetMachine &TM): - BaseTAI(TM) { + explicit ARMTargetAsmInfo(const ARMTargetMachine &TM) : BaseTAI(TM) { BaseTAI::AsmTransCBE = arm_asm_table; BaseTAI::AlignmentIsInBytes = false; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 7033907..8006b9b 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -23,9 +23,6 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -static cl::opt<bool> -EnablePreLdStOpti("arm-pre-alloc-loadstore-opti", cl::Hidden, - cl::desc("Enable pre-regalloc load store optimization pass")); static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden, cl::desc("Disable load store optimization pass")); static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden, @@ -42,6 +39,11 @@ int ARMTargetMachineModule = 0; static RegisterTarget<ARMTargetMachine> X("arm", "ARM"); static RegisterTarget<ThumbTargetMachine> Y("thumb", "Thumb"); +// Force static initialization when called from llvm/InitializeAllTargets.h +namespace llvm { + void InitializeARMTarget() { } +} + // No assembler printer by default ARMTargetMachine::AsmPrinterCtorFn ARMTargetMachine::AsmPrinterCtor = 0; @@ -97,7 +99,8 @@ ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS, InstrInfo(Subtarget), FrameInfo(Subtarget), JITInfo(), - TLInfo(*this) { + TLInfo(*this), + InstrItins(Subtarget.getInstrItineraryData()) { DefRelocModel = getRelocationModel(); } @@ -149,8 +152,6 @@ bool ARMTargetMachine::addInstSelector(PassManagerBase &PM, bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (!EnablePreLdStOpti) - return false; // FIXME: temporarily disabling load / store optimization pass for Thumb mode. if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb()) PM.add(createARMLoadStoreOptimizationPass(true)); diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 7192c1b..c4c8e6c 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -28,13 +28,14 @@ namespace llvm { class Module; class ARMTargetMachine : public LLVMTargetMachine { - ARMSubtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment - ARMInstrInfo InstrInfo; - ARMFrameInfo FrameInfo; - ARMJITInfo JITInfo; - ARMTargetLowering TLInfo; - Reloc::Model DefRelocModel; // Reloc model before it's overridden. + ARMSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + ARMInstrInfo InstrInfo; + ARMFrameInfo FrameInfo; + ARMJITInfo JITInfo; + ARMTargetLowering TLInfo; + InstrItineraryData InstrItins; + Reloc::Model DefRelocModel; // Reloc model before it's overridden. protected: // To avoid having target depend on the asmprinter stuff libraries, asmprinter @@ -59,6 +60,9 @@ public: virtual ARMTargetLowering *getTargetLowering() const { return const_cast<ARMTargetLowering*>(&TLInfo); } + virtual const InstrItineraryData getInstrItineraryData() const { + return InstrItins; + } static void registerAsmPrinter(AsmPrinterCtorFn F) { AsmPrinterCtor = F; diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index d908cf4..948a100 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -45,7 +45,6 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed"); namespace { class VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter { DwarfWriter *DW; - MachineModuleInfo *MMI; /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when printing asm code for different targets. @@ -84,7 +83,7 @@ namespace { explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM, const TargetAsmInfo *T, CodeGenOpt::Level OL, bool V) - : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(NULL), AFI(NULL), MCP(NULL), + : AsmPrinter(O, TM, T, OL, V), DW(0), AFI(NULL), MCP(NULL), InCPMode(false) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); } @@ -97,6 +96,7 @@ namespace { const char *Modifier = 0); void printSOImmOperand(const MachineInstr *MI, int opNum); void printSOImm2PartOperand(const MachineInstr *MI, int opNum); + void printSOOperand(const MachineInstr *MI, int OpNum); void printSORegOperand(const MachineInstr *MI, int opNum); void printAddrMode2Operand(const MachineInstr *MI, int OpNo); void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo); @@ -396,6 +396,28 @@ void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) { printSOImm(O, ARM_AM::getSOImmVal(V2), VerboseAsm, TAI); } +// Constant shifts so_reg is a 3-operand unit corresponding to register forms of +// the A5.1 "Addressing Mode 1 - Data-processing operands" forms. This +// includes: +// REG 0 - e.g. R5 +// REG IMM, SH_OPC - e.g. R5, LSL #3 +void ARMAsmPrinter::printSOOperand(const MachineInstr *MI, int OpNum) { + const MachineOperand &MO1 = MI->getOperand(OpNum); + const MachineOperand &MO2 = MI->getOperand(OpNum+1); + + unsigned Reg = MO1.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + O << TM.getRegisterInfo()->getAsmName(Reg); + + // Print the shift opc. + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm())) + << " "; + + assert(MO2.isImm() && "Not a valid t2_so_reg value!"); + O << "#" << ARM_AM::getSORegOffset(MO2.getImm()); +} + // so_reg is a 4-operand unit corresponding to register forms of the A5.1 // "Addressing Mode 1 - Data-processing operands" forms. This includes: // REG 0 0 - e.g. R5 @@ -805,17 +827,11 @@ void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) { bool ARMAsmPrinter::doInitialization(Module &M) { bool Result = AsmPrinter::doInitialization(M); - - // Emit initial debug information. - MMI = getAnalysisIfAvailable<MachineModuleInfo>(); - assert(MMI); DW = getAnalysisIfAvailable<DwarfWriter>(); - assert(DW && "Dwarf Writer is not available"); - DW->BeginModule(&M, MMI, O, this, TAI); - // Darwin wants symbols to be quoted if they have complex names. - if (Subtarget->isTargetDarwin()) - Mang->setUseQuotes(true); + // Thumb-2 instructions are supported only in unified assembler syntax mode. + if (Subtarget->hasThumb2()) + O << "\t.syntax unified\n"; // Emit ARM Build Attributes if (Subtarget->isTargetELF()) { @@ -1115,3 +1131,9 @@ namespace { } } Registrator; } + +// Force static initialization when called from +// llvm/InitializeAllAsmPrinters.h +namespace llvm { + void InitializeARMAsmPrinter() { } +} |