summaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.td18
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp3
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td4
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td25
-rw-r--r--lib/Target/ARM/ARMScheduleV6.td188
-rw-r--r--lib/Target/ARM/ARMScheduleV7.td8
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp13
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp5
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.h3
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp34
10 files changed, 262 insertions, 39 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index cb9bd6a..7033861 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -89,16 +89,18 @@ def : ProcNoItin<"xscale", [ArchV5TE]>;
def : ProcNoItin<"iwmmxt", [ArchV5TE]>;
// V6 Processors.
-def : ProcNoItin<"arm1136j-s", [ArchV6]>;
-def : ProcNoItin<"arm1136jf-s", [ArchV6, FeatureVFP2]>;
-def : ProcNoItin<"arm1176jz-s", [ArchV6]>;
-def : ProcNoItin<"arm1176jzf-s", [ArchV6, FeatureVFP2]>;
-def : ProcNoItin<"mpcorenovfp", [ArchV6]>;
-def : ProcNoItin<"mpcore", [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1136j-s", ARMV6Itineraries, [ArchV6]>;
+def : Processor<"arm1136jf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1176jz-s", ARMV6Itineraries, [ArchV6]>;
+def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
+def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>;
+def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
// V6T2 Processors.
-def : ProcNoItin<"arm1156t2-s", [ArchV6T2, FeatureThumb2]>;
-def : ProcNoItin<"arm1156t2f-s", [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
+def : Processor<"arm1156t2-s", ARMV6Itineraries,
+ [ArchV6T2, FeatureThumb2]>;
+def : Processor<"arm1156t2f-s", ARMV6Itineraries,
+ [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
// V7 Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9be7454..696a8e1 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1545,7 +1545,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
InFlag = SDValue(ResNode, 1);
ReplaceUses(SDValue(Op.getNode(), 1), InFlag);
}
- ReplaceUses(SDValue(Op.getNode(), 0), SDValue(Chain.getNode(), Chain.getResNo()));
+ ReplaceUses(SDValue(Op.getNode(), 0),
+ SDValue(Chain.getNode(), Chain.getResNo()));
return NULL;
}
case ARMISD::CMOV: {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 3fe634e..79bde29 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -608,11 +608,11 @@ def PICSTR : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
[(store GPR:$src, addrmodepc:$addr)]>;
def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}h\t$src, $addr",
+ Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrh${p}\t$src, $addr",
[(truncstorei16 GPR:$src, addrmodepc:$addr)]>;
def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}b\t$src, $addr",
+ Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrb${p}\t$src, $addr",
[(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
}
} // isNotDuplicable = 1
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 2796364..d1831d1 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -66,6 +66,11 @@ def thumb_immshifted_shamt : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(V, MVT::i32);
}]>;
+// Scaled 4 immediate.
+def t_imm_s4 : Operand<i32> {
+ let PrintMethod = "printThumbS4ImmOperand";
+}
+
// Define Thumb specific addressing modes.
// t_addrmode_rr := reg + reg
@@ -134,20 +139,20 @@ def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr,
[(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>;
// PC relative add.
-def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs), IIC_iALUi,
- "add\t$dst, pc, $rhs * 4", []>;
+def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi,
+ "add\t$dst, pc, $rhs", []>;
// ADD rd, sp, #imm8
-def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), IIC_iALUi,
- "add\t$dst, $sp, $rhs * 4", []>;
+def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi,
+ "add\t$dst, $sp, $rhs", []>;
// ADD sp, sp, #imm7
-def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi,
- "add\t$dst, $rhs * 4", []>;
+def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
+ "add\t$dst, $rhs", []>;
// SUB sp, sp, #imm7
-def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi,
- "sub\t$dst, $rhs * 4", []>;
+def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
+ "sub\t$dst, $rhs", []>;
// ADD rm, sp
def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
@@ -159,8 +164,8 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
// Pseudo instruction that will expand into a tSUBspi + a copy.
let usesCustomInserter = 1 in { // Expanded after instruction selection.
-def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
- NoItinerary, "@ sub\t$dst, $rhs * 4", []>;
+def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs),
+ NoItinerary, "@ sub\t$dst, $rhs", []>;
def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
NoItinerary, "@ add\t$dst, $rhs", []>;
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index 1ace718..0fef466 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -11,4 +11,190 @@
//
//===----------------------------------------------------------------------===//
-// TODO: Add model for an ARM11
+// Model based on ARM1176
+//
+// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual".
+//
+def ARMV6Itineraries : ProcessorItineraries<[
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [FU_Pipe0]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsr , [InstrStage<2, [FU_Pipe0]>], [3, 3, 2, 1]>,
+ //
+ // Unary Instructions that produce a result
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
+ InstrItinData<IIC_iUNAsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [FU_Pipe0]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>,
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [FU_Pipe0]>], [2]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>,
+ //
+ // Move instructions, conditional
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [FU_Pipe0]>], [3]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [FU_Pipe0]>], [3, 2]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [FU_Pipe0]>], [3, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1, 2]>,
+
+ // Integer load pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoadi , [InstrStage<1, [FU_Pipe0]>], [4, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoadr , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>,
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iLoadsi , [InstrStage<2, [FU_Pipe0]>], [5, 2, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoadiu , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoadru , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Pipe0]>], [5, 2, 2, 1]>,
+
+ //
+ // Load multiple
+ InstrItinData<IIC_iLoadm , [InstrStage<3, [FU_Pipe0]>]>,
+
+ // Integer store pipeline
+ //
+ // Immediate offset
+ InstrItinData<IIC_iStorei , [InstrStage<1, [FU_Pipe0]>], [2, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStorer , [InstrStage<1, [FU_Pipe0]>], [2, 1, 1]>,
+
+ //
+ // Scaled register offset, issues over 2 cycles
+ InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Pipe0]>], [2, 2, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1, 1]>,
+ //
+ // Scaled register offset with update, issues over 2 cycles
+ InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Pipe0]>], [2, 2, 2, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStorem , [InstrStage<3, [FU_Pipe0]>]>,
+
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>,
+
+ // VFP
+ // Issue through integer pipeline, and execute in NEON unit. We assume
+ // RunFast mode so that NFP pipeline is used for single-precision when
+ // possible.
+ //
+ // FP Special Register to Integer Register File Move
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0]>], [3]>,
+ //
+ // Single-precision FP Unary
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+ //
+ // Double-precision FP Unary
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+ //
+ // Single-precision FP Compare
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+ //
+ // Double-precision FP Compare
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>,
+ //
+ // Single to Double FP Convert
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+ //
+ // Double to Single FP Convert
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0]>], [5, 2]>,
+ //
+ // Single-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+ //
+ // Double-Precision FP to Integer Convert
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+ //
+ // Integer to Single-Precision FP Convert
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+ //
+ // Integer to Double-Precision FP Convert
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0]>], [9, 2]>,
+ //
+ // Single-precision FP ALU
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>,
+ //
+ // Double-precision FP ALU
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>,
+ //
+ // Single-precision FP Multiply
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>,
+ //
+ // Double-precision FP Multiply
+ InstrItinData<IIC_fpMUL64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2]>,
+ //
+ // Single-precision FP MAC
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2, 2]>,
+ //
+ // Double-precision FP MAC
+ InstrItinData<IIC_fpMAC64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2, 2]>,
+ //
+ // Single-precision FP DIV
+ InstrItinData<IIC_fpDIV32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>,
+ //
+ // Double-precision FP DIV
+ InstrItinData<IIC_fpDIV64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>,
+ //
+ // Single-precision FP SQRT
+ InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>,
+ //
+ // Double-precision FP SQRT
+ InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>,
+ //
+ // Single-precision FP Load
+ InstrItinData<IIC_fpLoad32 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>,
+ //
+ // Double-precision FP Load
+ InstrItinData<IIC_fpLoad64 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>,
+ //
+ // FP Load Multiple
+ InstrItinData<IIC_fpLoadm , [InstrStage<3, [FU_Pipe0]>]>,
+ //
+ // Single-precision FP Store
+ InstrItinData<IIC_fpStore32 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>,
+ //
+ // Double-precision FP Store
+ // use FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_fpStore64 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>,
+ //
+ // FP Store Multiple
+ InstrItinData<IIC_fpStorem , [InstrStage<3, [FU_Pipe0]>]>
+]>;
diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td
index e565813..427645c 100644
--- a/lib/Target/ARM/ARMScheduleV7.td
+++ b/lib/Target/ARM/ARMScheduleV7.td
@@ -184,7 +184,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
//
// Single-precision FP Compare
InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
- InstrStage<1, [FU_NPipe]>], [7, 1]>,
+ InstrStage<1, [FU_NPipe]>], [1, 1]>,
//
// Double-precision FP Compare
InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -221,7 +221,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
//
// Single-precision FP ALU
InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
- InstrStage<1, [FU_NPipe]>], [7, 1]>,
+ InstrStage<1, [FU_NPipe]>], [7, 1, 1]>,
//
// Double-precision FP ALU
InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -230,7 +230,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
//
// Single-precision FP Multiply
InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
- InstrStage<1, [FU_NPipe]>], [7, 1]>,
+ InstrStage<1, [FU_NPipe]>], [7, 1, 1]>,
//
// Double-precision FP Multiply
InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
@@ -239,7 +239,7 @@ def CortexA8Itineraries : ProcessorItineraries<[
//
// Single-precision FP MAC
InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
- InstrStage<1, [FU_NPipe]>], [7, 1]>,
+ InstrStage<1, [FU_NPipe]>], [7, 2, 1, 1]>,
//
// Double-precision FP MAC
InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 0352503..dd4a240 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -110,6 +110,7 @@ namespace {
const char *Modifier = 0);
void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum);
+ void printThumbS4ImmOperand(const MachineInstr *MI, int OpNum);
void printThumbITMask(const MachineInstr *MI, int OpNum);
void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNum);
void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNum,
@@ -674,6 +675,10 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op) {
//===--------------------------------------------------------------------===//
+void ARMAsmPrinter::printThumbS4ImmOperand(const MachineInstr *MI, int Op) {
+ O << "#" << MI->getOperand(Op).getImm() * 4;
+}
+
void
ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op) {
// (3 - the number of trailing zeros) is the number of then / else.
@@ -713,7 +718,7 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
if (MO3.getReg())
O << ", " << getRegisterName(MO3.getReg());
else if (unsigned ImmOffs = MO2.getImm())
- O << ", #" << ImmOffs * Scale;
+ O << ", #+" << ImmOffs * Scale;
O << "]";
}
@@ -735,7 +740,7 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
const MachineOperand &MO2 = MI->getOperand(Op+1);
O << "[" << getRegisterName(MO1.getReg());
if (unsigned ImmOffs = MO2.getImm())
- O << ", #" << ImmOffs << " * 4";
+ O << ", #+" << ImmOffs*4;
O << "]";
}
@@ -801,9 +806,9 @@ void ARMAsmPrinter::printT2AddrModeImm8s4Operand(const MachineInstr *MI,
int32_t OffImm = (int32_t)MO2.getImm() / 4;
// Don't print +0.
if (OffImm < 0)
- O << ", #-" << -OffImm << " * 4";
+ O << ", #-" << -OffImm * 4;
else if (OffImm > 0)
- O << ", #+" << OffImm << " * 4";
+ O << ", #+" << OffImm * 4;
O << "]";
}
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index 0047925..9fc57e0 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -351,3 +351,8 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum) {
// FIXME: remove this.
abort();
}
+
+void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum) {
+ // FIXME: remove this.
+ abort();
+}
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index 9e7f8d5..23a7f05 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -52,7 +52,8 @@ public:
const char *Modifier = 0);
void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum);
-
+
+ void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum);
void printThumbITMask(const MCInst *MI, unsigned OpNum) {}
void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum) {}
void printThumbAddrModeRI5Operand(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 9ce30aa..ad1739c 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -105,7 +105,7 @@ namespace {
// FIXME: Clean this up after splitting each Thumb load / store opcode
// into multiple ones.
- { ARM::t2LDRi12,ARM::tLDR, 0, 5, 0, 1, 0, 0,0, 1 },
+ { ARM::t2LDRi12,ARM::tLDR, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 1 },
{ ARM::t2LDRs, ARM::tLDR, 0, 0, 0, 1, 0, 0,0, 1 },
{ ARM::t2LDRBi12,ARM::tLDRB, 0, 5, 0, 1, 0, 0,0, 1 },
{ ARM::t2LDRBs, ARM::tLDRB, 0, 0, 0, 1, 0, 0,0, 1 },
@@ -113,7 +113,7 @@ namespace {
{ ARM::t2LDRHs, ARM::tLDRH, 0, 0, 0, 1, 0, 0,0, 1 },
{ ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 },
{ ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 },
- { ARM::t2STRi12,ARM::tSTR, 0, 5, 0, 1, 0, 0,0, 1 },
+ { ARM::t2STRi12,ARM::tSTR, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 1 },
{ ARM::t2STRs, ARM::tSTR, 0, 0, 0, 1, 0, 0,0, 1 },
{ ARM::t2STRBi12,ARM::tSTRB, 0, 5, 0, 1, 0, 0,0, 1 },
{ ARM::t2STRBs, ARM::tSTRB, 0, 0, 0, 1, 0, 0,0, 1 },
@@ -244,8 +244,13 @@ static bool VerifyLowRegs(MachineInstr *MI) {
continue;
if (isLROk && Reg == ARM::LR)
continue;
- if (isSPOk && Reg == ARM::SP)
- continue;
+ if (Reg == ARM::SP) {
+ if (isSPOk)
+ continue;
+ if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
+ // Special case for these ldr / str with sp as base register.
+ continue;
+ }
if (!isARMLowRegister(Reg))
return false;
}
@@ -261,17 +266,26 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
unsigned Scale = 1;
bool HasImmOffset = false;
bool HasShift = false;
+ bool HasOffReg = true;
bool isLdStMul = false;
unsigned Opc = Entry.NarrowOpc1;
unsigned OpNum = 3; // First 'rest' of operands.
+ uint8_t ImmLimit = Entry.Imm1Limit;
switch (Entry.WideOpc) {
default:
llvm_unreachable("Unexpected Thumb2 load / store opcode!");
case ARM::t2LDRi12:
- case ARM::t2STRi12:
+ case ARM::t2STRi12: {
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == ARM::SP) {
+ Opc = Entry.NarrowOpc2;
+ ImmLimit = Entry.Imm2Limit;
+ HasOffReg = false;
+ }
Scale = 4;
HasImmOffset = true;
break;
+ }
case ARM::t2LDRBi12:
case ARM::t2STRBi12:
HasImmOffset = true;
@@ -325,7 +339,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
unsigned OffsetImm = 0;
if (HasImmOffset) {
OffsetImm = MI->getOperand(2).getImm();
- unsigned MaxOffset = ((1 << Entry.Imm1Limit) - 1) * Scale;
+ unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
if ((OffsetImm & (Scale-1)) || OffsetImm > MaxOffset)
// Make sure the immediate field fits.
return false;
@@ -337,7 +351,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
if (!isLdStMul) {
MIB.addOperand(MI->getOperand(0)).addOperand(MI->getOperand(1));
- if (Entry.NarrowOpc1 != ARM::tLDRSB && Entry.NarrowOpc1 != ARM::tLDRSH) {
+ if (Opc != ARM::tLDRSB && Opc != ARM::tLDRSH) {
// tLDRSB and tLDRSH do not have an immediate offset field. On the other
// hand, it must have an offset register.
// FIXME: Remove this special case.
@@ -345,13 +359,17 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
}
assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
- MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
+ if (HasOffReg)
+ MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
}
// Transfer the rest of operands.
for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
MIB.addOperand(MI->getOperand(OpNum));
+ // Transfer memoperands.
+ (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
MBB.erase(MI);
OpenPOWER on IntegriCloud