diff options
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r-- | lib/Target/ARM/ARM.td | 18 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 4 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb.td | 25 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleV6.td | 188 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleV7.td | 8 | ||||
-rw-r--r-- | lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp | 13 | ||||
-rw-r--r-- | lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp | 5 | ||||
-rw-r--r-- | lib/Target/ARM/AsmPrinter/ARMInstPrinter.h | 3 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb2SizeReduction.cpp | 34 |
10 files changed, 262 insertions, 39 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index cb9bd6a..7033861 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -89,16 +89,18 @@ def : ProcNoItin<"xscale", [ArchV5TE]>; def : ProcNoItin<"iwmmxt", [ArchV5TE]>; // V6 Processors. -def : ProcNoItin<"arm1136j-s", [ArchV6]>; -def : ProcNoItin<"arm1136jf-s", [ArchV6, FeatureVFP2]>; -def : ProcNoItin<"arm1176jz-s", [ArchV6]>; -def : ProcNoItin<"arm1176jzf-s", [ArchV6, FeatureVFP2]>; -def : ProcNoItin<"mpcorenovfp", [ArchV6]>; -def : ProcNoItin<"mpcore", [ArchV6, FeatureVFP2]>; +def : Processor<"arm1136j-s", ARMV6Itineraries, [ArchV6]>; +def : Processor<"arm1136jf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; +def : Processor<"arm1176jz-s", ARMV6Itineraries, [ArchV6]>; +def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; +def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>; +def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; // V6T2 Processors. -def : ProcNoItin<"arm1156t2-s", [ArchV6T2, FeatureThumb2]>; -def : ProcNoItin<"arm1156t2f-s", [ArchV6T2, FeatureThumb2, FeatureVFP2]>; +def : Processor<"arm1156t2-s", ARMV6Itineraries, + [ArchV6T2, FeatureThumb2]>; +def : Processor<"arm1156t2f-s", ARMV6Itineraries, + [ArchV6T2, FeatureThumb2, FeatureVFP2]>; // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 9be7454..696a8e1 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1545,7 +1545,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { InFlag = SDValue(ResNode, 1); ReplaceUses(SDValue(Op.getNode(), 1), InFlag); } - ReplaceUses(SDValue(Op.getNode(), 0), SDValue(Chain.getNode(), Chain.getResNo())); + ReplaceUses(SDValue(Op.getNode(), 0), + SDValue(Chain.getNode(), Chain.getResNo())); return NULL; } case ARMISD::CMOV: { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 3fe634e..79bde29 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -608,11 +608,11 @@ def PICSTR : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), [(store GPR:$src, addrmodepc:$addr)]>; def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}h\t$src, $addr", + Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrh${p}\t$src, $addr", [(truncstorei16 GPR:$src, addrmodepc:$addr)]>; def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}b\t$src, $addr", + Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrb${p}\t$src, $addr", [(truncstorei8 GPR:$src, addrmodepc:$addr)]>; } } // isNotDuplicable = 1 diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 2796364..d1831d1 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -66,6 +66,11 @@ def thumb_immshifted_shamt : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(V, MVT::i32); }]>; +// Scaled 4 immediate. +def t_imm_s4 : Operand<i32> { + let PrintMethod = "printThumbS4ImmOperand"; +} + // Define Thumb specific addressing modes. // t_addrmode_rr := reg + reg @@ -134,20 +139,20 @@ def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>; // PC relative add. -def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs), IIC_iALUi, - "add\t$dst, pc, $rhs * 4", []>; +def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi, + "add\t$dst, pc, $rhs", []>; // ADD rd, sp, #imm8 -def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), IIC_iALUi, - "add\t$dst, $sp, $rhs * 4", []>; +def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi, + "add\t$dst, $sp, $rhs", []>; // ADD sp, sp, #imm7 -def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi, - "add\t$dst, $rhs * 4", []>; +def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi, + "add\t$dst, $rhs", []>; // SUB sp, sp, #imm7 -def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi, - "sub\t$dst, $rhs * 4", []>; +def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi, + "sub\t$dst, $rhs", []>; // ADD rm, sp def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, @@ -159,8 +164,8 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, // Pseudo instruction that will expand into a tSUBspi + a copy. let usesCustomInserter = 1 in { // Expanded after instruction selection. -def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), - NoItinerary, "@ sub\t$dst, $rhs * 4", []>; +def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), + NoItinerary, "@ sub\t$dst, $rhs", []>; def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), NoItinerary, "@ add\t$dst, $rhs", []>; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 1ace718..0fef466 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -11,4 +11,190 @@ // //===----------------------------------------------------------------------===// -// TODO: Add model for an ARM11 +// Model based on ARM1176 +// +// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual". +// +def ARMV6Itineraries : ProcessorItineraries<[ + // + // No operand cycles + InstrItinData<IIC_iALUx , [InstrStage<1, [FU_Pipe0]>]>, + // + // Binary Instructions that produce a result + InstrItinData<IIC_iALUi , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>, + InstrItinData<IIC_iALUsi , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>, + InstrItinData<IIC_iALUsr , [InstrStage<2, [FU_Pipe0]>], [3, 3, 2, 1]>, + // + // Unary Instructions that produce a result + InstrItinData<IIC_iUNAr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + InstrItinData<IIC_iUNAsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, + InstrItinData<IIC_iUNAsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>, + // + // Compare instructions + InstrItinData<IIC_iCMPi , [InstrStage<1, [FU_Pipe0]>], [2]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + InstrItinData<IIC_iCMPsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, + InstrItinData<IIC_iCMPsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>, + // + // Move instructions, unconditional + InstrItinData<IIC_iMOVi , [InstrStage<1, [FU_Pipe0]>], [2]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>, + // + // Move instructions, conditional + InstrItinData<IIC_iCMOVi , [InstrStage<1, [FU_Pipe0]>], [3]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [FU_Pipe0]>], [3, 2]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [FU_Pipe0]>], [3, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>, + + // Integer multiply pipeline + // + InstrItinData<IIC_iMUL16 , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1, 2]>, + InstrItinData<IIC_iMUL32 , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1, 2]>, + InstrItinData<IIC_iMUL64 , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1, 2]>, + + // Integer load pipeline + // + // Immediate offset + InstrItinData<IIC_iLoadi , [InstrStage<1, [FU_Pipe0]>], [4, 1]>, + // + // Register offset + InstrItinData<IIC_iLoadr , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>, + // + // Scaled register offset, issues over 2 cycles + InstrItinData<IIC_iLoadsi , [InstrStage<2, [FU_Pipe0]>], [5, 2, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iLoadiu , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>, + // + // Register offset with update + InstrItinData<IIC_iLoadru , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1, 1]>, + // + // Scaled register offset with update, issues over 2 cycles + InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Pipe0]>], [5, 2, 2, 1]>, + + // + // Load multiple + InstrItinData<IIC_iLoadm , [InstrStage<3, [FU_Pipe0]>]>, + + // Integer store pipeline + // + // Immediate offset + InstrItinData<IIC_iStorei , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, + // + // Register offset + InstrItinData<IIC_iStorer , [InstrStage<1, [FU_Pipe0]>], [2, 1, 1]>, + + // + // Scaled register offset, issues over 2 cycles + InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Pipe0]>], [2, 2, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>, + // + // Register offset with update + InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1, 1]>, + // + // Scaled register offset with update, issues over 2 cycles + InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Pipe0]>], [2, 2, 2, 1]>, + // + // Store multiple + InstrItinData<IIC_iStorem , [InstrStage<3, [FU_Pipe0]>]>, + + // Branch + // + // no delay slots, so the latency of a branch is unimportant + InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>, + + // VFP + // Issue through integer pipeline, and execute in NEON unit. We assume + // RunFast mode so that NFP pipeline is used for single-precision when + // possible. + // + // FP Special Register to Integer Register File Move + InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0]>], [3]>, + // + // Single-precision FP Unary + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + // + // Double-precision FP Unary + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + // + // Single-precision FP Compare + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + // + // Double-precision FP Compare + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + // + // Single to Double FP Convert + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + // + // Double to Single FP Convert + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + // + // Single-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + // + // Double-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + // + // Integer to Single-Precision FP Convert + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + // + // Integer to Double-Precision FP Convert + InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + // + // Single-precision FP ALU + InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>, + // + // Double-precision FP ALU + InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>, + // + // Single-precision FP Multiply + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>, + // + // Double-precision FP Multiply + InstrItinData<IIC_fpMUL64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2]>, + // + // Single-precision FP MAC + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2, 2]>, + // + // Double-precision FP MAC + InstrItinData<IIC_fpMAC64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2, 2]>, + // + // Single-precision FP DIV + InstrItinData<IIC_fpDIV32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>, + // + // Double-precision FP DIV + InstrItinData<IIC_fpDIV64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>, + // + // Single-precision FP SQRT + InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>, + // + // Double-precision FP SQRT + InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>, + // + // Single-precision FP Load + InstrItinData<IIC_fpLoad32 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>, + // + // Double-precision FP Load + InstrItinData<IIC_fpLoad64 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>, + // + // FP Load Multiple + InstrItinData<IIC_fpLoadm , [InstrStage<3, [FU_Pipe0]>]>, + // + // Single-precision FP Store + InstrItinData<IIC_fpStore32 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>, + // + // Double-precision FP Store + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore64 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>, + // + // FP Store Multiple + InstrItinData<IIC_fpStorem , [InstrStage<3, [FU_Pipe0]>]> +]>; diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td index e565813..427645c 100644 --- a/lib/Target/ARM/ARMScheduleV7.td +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -184,7 +184,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // // Single-precision FP Compare InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, + InstrStage<1, [FU_NPipe]>], [1, 1]>, // // Double-precision FP Compare InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, @@ -221,7 +221,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // // Single-precision FP ALU InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, + InstrStage<1, [FU_NPipe]>], [7, 1, 1]>, // // Double-precision FP ALU InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, @@ -230,7 +230,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // // Single-precision FP Multiply InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, + InstrStage<1, [FU_NPipe]>], [7, 1, 1]>, // // Double-precision FP Multiply InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, @@ -239,7 +239,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // // Single-precision FP MAC InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, + InstrStage<1, [FU_NPipe]>], [7, 2, 1, 1]>, // // Double-precision FP MAC InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 0352503..dd4a240 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -110,6 +110,7 @@ namespace { const char *Modifier = 0); void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum); + void printThumbS4ImmOperand(const MachineInstr *MI, int OpNum); void printThumbITMask(const MachineInstr *MI, int OpNum); void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNum); void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNum, @@ -674,6 +675,10 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op) { //===--------------------------------------------------------------------===// +void ARMAsmPrinter::printThumbS4ImmOperand(const MachineInstr *MI, int Op) { + O << "#" << MI->getOperand(Op).getImm() * 4; +} + void ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op) { // (3 - the number of trailing zeros) is the number of then / else. @@ -713,7 +718,7 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op, if (MO3.getReg()) O << ", " << getRegisterName(MO3.getReg()); else if (unsigned ImmOffs = MO2.getImm()) - O << ", #" << ImmOffs * Scale; + O << ", #+" << ImmOffs * Scale; O << "]"; } @@ -735,7 +740,7 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) { const MachineOperand &MO2 = MI->getOperand(Op+1); O << "[" << getRegisterName(MO1.getReg()); if (unsigned ImmOffs = MO2.getImm()) - O << ", #" << ImmOffs << " * 4"; + O << ", #+" << ImmOffs*4; O << "]"; } @@ -801,9 +806,9 @@ void ARMAsmPrinter::printT2AddrModeImm8s4Operand(const MachineInstr *MI, int32_t OffImm = (int32_t)MO2.getImm() / 4; // Don't print +0. if (OffImm < 0) - O << ", #-" << -OffImm << " * 4"; + O << ", #-" << -OffImm * 4; else if (OffImm > 0) - O << ", #+" << OffImm << " * 4"; + O << ", #+" << OffImm * 4; O << "]"; } diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index 0047925..9fc57e0 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -351,3 +351,8 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum) { // FIXME: remove this. abort(); } + +void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum) { + // FIXME: remove this. + abort(); +} diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index 9e7f8d5..23a7f05 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -52,7 +52,8 @@ public: const char *Modifier = 0); void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum); - + + void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum); void printThumbITMask(const MCInst *MI, unsigned OpNum) {} void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum) {} void printThumbAddrModeRI5Operand(const MCInst *MI, unsigned OpNum, diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 9ce30aa..ad1739c 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -105,7 +105,7 @@ namespace { // FIXME: Clean this up after splitting each Thumb load / store opcode // into multiple ones. - { ARM::t2LDRi12,ARM::tLDR, 0, 5, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDRi12,ARM::tLDR, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 1 }, { ARM::t2LDRs, ARM::tLDR, 0, 0, 0, 1, 0, 0,0, 1 }, { ARM::t2LDRBi12,ARM::tLDRB, 0, 5, 0, 1, 0, 0,0, 1 }, { ARM::t2LDRBs, ARM::tLDRB, 0, 0, 0, 1, 0, 0,0, 1 }, @@ -113,7 +113,7 @@ namespace { { ARM::t2LDRHs, ARM::tLDRH, 0, 0, 0, 1, 0, 0,0, 1 }, { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 }, { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRi12,ARM::tSTR, 0, 5, 0, 1, 0, 0,0, 1 }, + { ARM::t2STRi12,ARM::tSTR, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 1 }, { ARM::t2STRs, ARM::tSTR, 0, 0, 0, 1, 0, 0,0, 1 }, { ARM::t2STRBi12,ARM::tSTRB, 0, 5, 0, 1, 0, 0,0, 1 }, { ARM::t2STRBs, ARM::tSTRB, 0, 0, 0, 1, 0, 0,0, 1 }, @@ -244,8 +244,13 @@ static bool VerifyLowRegs(MachineInstr *MI) { continue; if (isLROk && Reg == ARM::LR) continue; - if (isSPOk && Reg == ARM::SP) - continue; + if (Reg == ARM::SP) { + if (isSPOk) + continue; + if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12)) + // Special case for these ldr / str with sp as base register. + continue; + } if (!isARMLowRegister(Reg)) return false; } @@ -261,17 +266,26 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, unsigned Scale = 1; bool HasImmOffset = false; bool HasShift = false; + bool HasOffReg = true; bool isLdStMul = false; unsigned Opc = Entry.NarrowOpc1; unsigned OpNum = 3; // First 'rest' of operands. + uint8_t ImmLimit = Entry.Imm1Limit; switch (Entry.WideOpc) { default: llvm_unreachable("Unexpected Thumb2 load / store opcode!"); case ARM::t2LDRi12: - case ARM::t2STRi12: + case ARM::t2STRi12: { + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == ARM::SP) { + Opc = Entry.NarrowOpc2; + ImmLimit = Entry.Imm2Limit; + HasOffReg = false; + } Scale = 4; HasImmOffset = true; break; + } case ARM::t2LDRBi12: case ARM::t2STRBi12: HasImmOffset = true; @@ -325,7 +339,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, unsigned OffsetImm = 0; if (HasImmOffset) { OffsetImm = MI->getOperand(2).getImm(); - unsigned MaxOffset = ((1 << Entry.Imm1Limit) - 1) * Scale; + unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; if ((OffsetImm & (Scale-1)) || OffsetImm > MaxOffset) // Make sure the immediate field fits. return false; @@ -337,7 +351,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc)); if (!isLdStMul) { MIB.addOperand(MI->getOperand(0)).addOperand(MI->getOperand(1)); - if (Entry.NarrowOpc1 != ARM::tLDRSB && Entry.NarrowOpc1 != ARM::tLDRSH) { + if (Opc != ARM::tLDRSB && Opc != ARM::tLDRSH) { // tLDRSB and tLDRSH do not have an immediate offset field. On the other // hand, it must have an offset register. // FIXME: Remove this special case. @@ -345,13 +359,17 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, } assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); - MIB.addReg(OffsetReg, getKillRegState(OffsetKill)); + if (HasOffReg) + MIB.addReg(OffsetReg, getKillRegState(OffsetKill)); } // Transfer the rest of operands. for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) MIB.addOperand(MI->getOperand(OpNum)); + // Transfer memoperands. + (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase(MI); |