summaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.h2
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h18
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp177
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h15
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp90
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h2
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp9
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp143
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp22
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h4
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp115
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp63
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp153
-rw-r--r--lib/Target/ARM/ARMISelLowering.h17
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp20
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h9
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td135
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td35
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td10
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td10
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td263
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp99
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp13
-rw-r--r--lib/Target/ARM/ARMSubtarget.h15
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp19
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp54
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp7
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.h4
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp1
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp8
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp28
-rw-r--r--lib/Target/ARM/README-Thumb.txt8
-rw-r--r--lib/Target/ARM/README.txt7
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp4
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h2
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp7
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h5
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp44
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp6
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h2
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.h5
42 files changed, 1087 insertions, 564 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 76cc06e..ff1980d 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -103,11 +103,13 @@ FunctionPass *createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM,
ObjectCodeEmitter &OCE);
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
+FunctionPass *createARMExpandPseudoPass();
FunctionPass *createARMConstantIslandPass();
FunctionPass *createNEONPreAllocPass();
FunctionPass *createNEONMoveFixPass();
FunctionPass *createThumb2ITBlockPass();
FunctionPass *createThumb2SizeReductionPass();
+FunctionPass *createARMMaxStackAlignmentCalculatorPass();
extern Target TheARMTarget, TheThumbTarget;
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index c603708..ddeb1b9 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -520,8 +520,8 @@ namespace ARM_AM {
return ((AM5Opc >> 8) & 1) ? sub : add;
}
- /// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and
- /// FSTM instructions.
+ /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and
+ /// VSTM instructions.
static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB,
unsigned char Offset) {
assert((SubMode == ia || SubMode == db) &&
@@ -541,13 +541,15 @@ namespace ARM_AM {
//
// This is used for NEON load / store instructions.
//
- // addrmode6 := reg with optional writeback
+ // addrmode6 := reg with optional writeback and alignment
//
- // This is stored in three operands [regaddr, regupdate, opc]. The first is
- // the address register. The second register holds the value of a post-access
- // increment for writeback or reg0 if no writeback or if the writeback
- // increment is the size of the memory access. The third operand encodes
- // whether there is writeback to the address register.
+ // This is stored in four operands [regaddr, regupdate, opc, align]. The
+ // first is the address register. The second register holds the value of
+ // a post-access increment for writeback or reg0 if no writeback or if the
+ // writeback increment is the size of the memory access. The third
+ // operand encodes whether there is writeback to the address register. The
+ // fourth operand is the value of the alignment specifier to use or zero if
+ // no explicit alignment.
static inline unsigned getAM6Opc(bool WB = false) {
return (int)WB;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 7c5b0f0..b50b609 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,11 +14,16 @@
#include "ARMBaseInstrInfo.h"
#include "ARM.h"
#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -504,9 +509,9 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
switch (MI.getOpcode()) {
default: break;
- case ARM::FCPYS:
- case ARM::FCPYD:
+ case ARM::VMOVS:
case ARM::VMOVD:
+ case ARM::VMOVDneon:
case ARM::VMOVQ: {
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
@@ -556,8 +561,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
return MI->getOperand(0).getReg();
}
break;
- case ARM::FLDD:
- case ARM::FLDS:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
if (MI->getOperand(1).isFI() &&
MI->getOperand(2).isImm() &&
MI->getOperand(2).getImm() == 0) {
@@ -595,8 +600,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return MI->getOperand(0).getReg();
}
break;
- case ARM::FSTD:
- case ARM::FSTS:
+ case ARM::VSTRD:
+ case ARM::VSTRS:
if (MI->getOperand(1).isFI() &&
MI->getOperand(2).isImm() &&
MI->getOperand(2).getImm() == 0) {
@@ -632,17 +637,17 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
DestReg).addReg(SrcReg)));
} else if (DestRC == ARM::SPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVS), DestReg)
.addReg(SrcReg));
} else if (DestRC == ARM::DPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg)
.addReg(SrcReg));
} else if (DestRC == ARM::DPR_VFP2RegisterClass ||
DestRC == ARM::DPR_8RegisterClass ||
SrcRC == ARM::DPR_VFP2RegisterClass ||
SrcRC == ARM::DPR_8RegisterClass) {
// Always use neon reg-reg move if source or dest is NEON-only regclass.
- BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg).addReg(SrcReg);
+ BuildMI(MBB, I, DL, get(ARM::VMOVDneon), DestReg).addReg(SrcReg);
} else if (DestRC == ARM::QPRRegisterClass ||
DestRC == ARM::QPR_VFP2RegisterClass ||
DestRC == ARM::QPR_8RegisterClass) {
@@ -662,12 +667,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
MachineMemOperand::MOStore, 0,
MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ Align);
if (RC == ARM::GPRRegisterClass) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
@@ -676,19 +682,27 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else if (RC == ARM::DPRRegisterClass ||
RC == ARM::DPR_VFP2RegisterClass ||
RC == ARM::DPR_8RegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (RC == ARM::SPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else {
assert((RC == ARM::QPRRegisterClass ||
RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
// FIXME: Neon instructions should support predicates
- BuildMI(MBB, I, DL, get(ARM::VSTRQ)).addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ if (Align >= 16
+ && (getRegisterInfo().needsStackRealignment(MF))) {
+ BuildMI(MBB, I, DL, get(ARM::VST1q64))
+ .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO)
+ .addReg(SrcReg, getKillRegState(isKill));
+ } else {
+ BuildMI(MBB, I, DL, get(ARM::VSTRQ)).
+ addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ }
}
}
@@ -700,12 +714,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
MachineMemOperand::MOLoad, 0,
MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ Align);
if (RC == ARM::GPRRegisterClass) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
@@ -713,18 +728,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else if (RC == ARM::DPRRegisterClass ||
RC == ARM::DPR_VFP2RegisterClass ||
RC == ARM::DPR_8RegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (RC == ARM::SPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else {
assert((RC == ARM::QPRRegisterClass ||
RC == ARM::QPR_VFP2RegisterClass ||
RC == ARM::QPR_8RegisterClass) && "Unknown regclass!");
// FIXME: Neon instructions should support predicates
- BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0).
- addMemOperand(MMO);
+ if (Align >= 16
+ && (getRegisterInfo().needsStackRealignment(MF))) {
+ BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
+ .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO);
+ } else {
+ BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0).
+ addMemOperand(MMO);
+ }
}
}
@@ -805,7 +826,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
DstSubReg)
.addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0);
}
- } else if (Opc == ARM::FCPYS) {
+ } else if (Opc == ARM::VMOVS) {
unsigned Pred = MI->getOperand(2).getImm();
unsigned PredReg = MI->getOperand(3).getReg();
if (OpNum == 0) { // move -> store
@@ -813,7 +834,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
unsigned SrcSubReg = MI->getOperand(1).getSubReg();
bool isKill = MI->getOperand(1).isKill();
bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS))
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRS))
.addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef),
SrcSubReg)
.addFrameIndex(FI)
@@ -823,7 +844,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
unsigned DstSubReg = MI->getOperand(0).getSubReg();
bool isDead = MI->getOperand(0).isDead();
bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS))
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRS))
.addReg(DstReg,
RegState::Define |
getDeadRegState(isDead) |
@@ -832,7 +853,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
.addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
}
}
- else if (Opc == ARM::FCPYD) {
+ else if (Opc == ARM::VMOVD) {
unsigned Pred = MI->getOperand(2).getImm();
unsigned PredReg = MI->getOperand(3).getReg();
if (OpNum == 0) { // move -> store
@@ -840,7 +861,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
unsigned SrcSubReg = MI->getOperand(1).getSubReg();
bool isKill = MI->getOperand(1).isKill();
bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD))
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRD))
.addReg(SrcReg,
getKillRegState(isKill) | getUndefRegState(isUndef),
SrcSubReg)
@@ -850,7 +871,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
unsigned DstSubReg = MI->getOperand(0).getSubReg();
bool isDead = MI->getOperand(0).isDead();
bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD))
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRD))
.addReg(DstReg,
RegState::Define |
getDeadRegState(isDead) |
@@ -886,15 +907,114 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
Opc == ARM::tMOVtgpr2gpr ||
Opc == ARM::tMOVgpr2tgpr) {
return true;
- } else if (Opc == ARM::FCPYS || Opc == ARM::FCPYD) {
+ } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD) {
return true;
- } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVQ) {
+ } else if (Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) {
return false; // FIXME
}
return false;
}
+void ARMBaseInstrInfo::
+reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc dl = Orig->getDebugLoc();
+
+ if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
+ DestReg = TRI->getSubReg(DestReg, SubIdx);
+ SubIdx = 0;
+ }
+
+ unsigned Opcode = Orig->getOpcode();
+ switch (Opcode) {
+ default: {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->getOperand(0).setReg(DestReg);
+ MBB.insert(I, MI);
+ break;
+ }
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ MachineConstantPool *MCP = MF.getConstantPool();
+ unsigned CPI = Orig->getOperand(1).getIndex();
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
+ assert(MCPE.isMachineConstantPoolEntry() &&
+ "Expecting a machine constantpool entry!");
+ ARMConstantPoolValue *ACPV =
+ static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+ unsigned PCLabelId = AFI->createConstPoolEntryUId();
+ ARMConstantPoolValue *NewCPV = 0;
+ if (ACPV->isGlobalValue())
+ NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
+ ARMCP::CPValue, 4);
+ else if (ACPV->isExtSymbol())
+ NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(),
+ ACPV->getSymbol(), PCLabelId, 4);
+ else if (ACPV->isBlockAddress())
+ NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
+ ARMCP::CPBlockAddress, 4);
+ else
+ llvm_unreachable("Unexpected ARM constantpool value type!!");
+ CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
+ MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
+ DestReg)
+ .addConstantPoolIndex(CPI).addImm(PCLabelId);
+ (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
+ break;
+ }
+ }
+
+ MachineInstr *NewMI = prior(I);
+ NewMI->getOperand(0).setSubReg(SubIdx);
+}
+
+bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const {
+ int Opcode = MI0->getOpcode();
+ if (Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci_pic) {
+ if (MI1->getOpcode() != Opcode)
+ return false;
+ if (MI0->getNumOperands() != MI1->getNumOperands())
+ return false;
+
+ const MachineOperand &MO0 = MI0->getOperand(1);
+ const MachineOperand &MO1 = MI1->getOperand(1);
+ if (MO0.getOffset() != MO1.getOffset())
+ return false;
+
+ const MachineFunction *MF = MI0->getParent()->getParent();
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ int CPI0 = MO0.getIndex();
+ int CPI1 = MO1.getIndex();
+ const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
+ const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
+ ARMConstantPoolValue *ACPV0 =
+ static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
+ ARMConstantPoolValue *ACPV1 =
+ static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
+ return ACPV0->hasSameValue(ACPV1);
+ }
+
+ return TargetInstrInfoImpl::isIdentical(MI0, MI1, MRI);
+}
+
+unsigned ARMBaseInstrInfo::TailDuplicationLimit(const MachineBasicBlock &MBB,
+ unsigned DefaultLimit) const {
+ // If the target processor can predict indirect branches, it is highly
+ // desirable to duplicate them, since it can often make them predictable.
+ if (!MBB.empty() && isIndirectBranchOpcode(MBB.back().getOpcode()) &&
+ getSubtarget().hasBranchTargetBuffer())
+ return DefaultLimit + 2;
+ return DefaultLimit;
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
@@ -1022,6 +1142,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
break;
}
case ARMII::AddrMode4:
+ case ARMII::AddrMode6:
// Can't fold any offset even if it's zero.
return false;
case ARMII::AddrMode5: {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 2ba3774..73e854f 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -1,4 +1,4 @@
-//===- ARMBaseInstrInfo.h - ARM Base Instruction Information -------------*- C++ -*-===//
+//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -261,9 +261,20 @@ public:
virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
+ const SmallVectorImpl<unsigned> &Ops,
MachineInstr* LoadMI) const;
+ virtual void reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other,
+ const MachineRegisterInfo *MRI) const;
+
+ virtual unsigned TailDuplicationLimit(const MachineBasicBlock &MBB,
+ unsigned DefaultLimit) const;
};
static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 70377f9e..19762ee 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -44,10 +44,6 @@ static cl::opt<bool>
ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true),
cl::desc("Reuse repeated frame index values"));
-static cl::opt<bool>
-ARMDynamicStackAlign("arm-dynamic-stack-alignment", cl::Hidden, cl::init(false),
- cl::desc("Dynamically re-align the stack as needed"));
-
unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
bool *isSPVFP) {
if (isSPVFP)
@@ -476,11 +472,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
}
static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
- // FIXME: For now, force at least 128-bit alignment. This will push the
- // nightly tester harder for making sure things work correctly. When
- // we're ready to enable this for real, this goes back to starting at zero.
- unsigned MaxAlign = 16;
-// unsigned MaxAlign = 0;
+ unsigned MaxAlign = 0;
for (int i = FFI->getObjectIndexBegin(),
e = FFI->getObjectIndexEnd(); i != e; ++i) {
@@ -508,20 +500,12 @@ bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::
needsStackRealignment(const MachineFunction &MF) const {
- // Only do this for ARM if explicitly enabled
- // FIXME: Once it's passing all the tests, enable by default
- if (!ARMDynamicStackAlign)
- return false;
-
- // FIXME: To force more brutal testing, realign whether we need to or not.
- // Change this to be more selective when we turn it on for real, of course.
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-// unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
return (RealignStack &&
!AFI->isThumb1OnlyFunction() &&
- AFI->hasStackFrame() &&
-// (MFI->getMaxAlignment() > StackAlign) &&
+ (MFI->getMaxAlignment() > StackAlign) &&
!MFI->hasVarSizedObjects());
}
@@ -529,7 +513,8 @@ bool ARMBaseRegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const
const MachineFrameInfo *MFI = MF.getFrameInfo();
if (NoFramePointerElim && MFI->hasCalls())
return true;
- return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
+ return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
+ || needsStackRealignment(MF);
}
/// estimateStackSize - Estimate and return the size of the frame.
@@ -604,7 +589,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
- if (ARMDynamicStackAlign) {
+ if (RealignStack) {
unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
calculateMaxStackAlignment(MFI));
MFI->setMaxAlignment(MaxAlign);
@@ -789,7 +774,8 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Reserve a slot closest to SP or frame pointer.
const TargetRegisterClass *RC = ARM::GPRRegisterClass;
RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment()));
+ RC->getAlignment(),
+ false));
}
}
}
@@ -806,7 +792,8 @@ unsigned ARMBaseRegisterInfo::getRARegister() const {
return ARM::LR;
}
-unsigned ARMBaseRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned
+ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
if (STI.isTargetDarwin() || hasFP(MF))
return FramePtr;
return ARM::SP;
@@ -1183,7 +1170,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// as much as possible above, handle the rest, providing a register that is
// SP+LargeImm.
assert((Offset ||
- (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4) &&
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) &&
"This code isn't needed if offset already handled!");
unsigned ScratchReg = 0;
@@ -1192,7 +1180,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
if (Offset == 0)
- // Must be addrmode4.
+ // Must be addrmode4/6.
MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
else {
ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
@@ -1346,7 +1334,7 @@ emitPrologue(MachineFunction &MF) const {
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
- movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 0, 3, STI);
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::VSTRD, 0, 3, STI);
NumBytes = DPRCSOffset;
if (NumBytes) {
// Adjust SP after all the callee-save spills.
@@ -1385,7 +1373,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
static bool isCSRestore(MachineInstr *MI,
const ARMBaseInstrInfo &TII,
const unsigned *CSRegs) {
- return ((MI->getOpcode() == (int)ARM::FLDD ||
+ return ((MI->getOpcode() == (int)ARM::VLDRD ||
MI->getOpcode() == (int)ARM::LDR ||
MI->getOpcode() == (int)ARM::t2LDRi12) &&
MI->getOperand(1).isFI() &&
@@ -1411,7 +1399,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
if (NumBytes != 0)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
} else {
- // Unwind MBBI to point to first LDR / FLDD.
+ // Unwind MBBI to point to first LDR / VLDRD.
const unsigned *CSRegs = getCalleeSavedRegs();
if (MBBI != MBB.begin()) {
do
@@ -1459,7 +1447,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
// Move SP to start of integer callee save spill area 2.
- movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 0, 3, STI);
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::VLDRD, 0, 3, STI);
emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedAreaSize());
// Move SP to start of integer callee save spill area 1.
@@ -1475,4 +1463,48 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
}
+namespace {
+ struct MaximalStackAlignmentCalculator : public MachineFunctionPass {
+ static char ID;
+ MaximalStackAlignmentCalculator() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ MachineRegisterInfo &RI = MF.getRegInfo();
+
+ // Calculate max stack alignment of all already allocated stack objects.
+ unsigned MaxAlign = calculateMaxStackAlignment(FFI);
+
+ // Be over-conservative: scan over all vreg defs and find, whether vector
+ // registers are used. If yes - there is probability, that vector register
+ // will be spilled and thus stack needs to be aligned properly.
+ for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
+ RegNum < RI.getLastVirtReg(); ++RegNum)
+ MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
+
+ if (FFI->getMaxAlignment() == MaxAlign)
+ return false;
+
+ FFI->setMaxAlignment(MaxAlign);
+ return true;
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM Stack Required Alignment Auto-Detector";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+
+ char MaximalStackAlignmentCalculator::ID = 0;
+}
+
+FunctionPass*
+llvm::createARMMaxStackAlignmentCalculatorPass() {
+ return new MaximalStackAlignmentCalculator();
+}
+
#include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 029e468..4b267b0 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -105,7 +105,7 @@ public:
// Debug information queries.
unsigned getRARegister() const;
- unsigned getFrameRegister(MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 13cf676..766acff 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -168,7 +168,8 @@ namespace {
/// Routines that handle operands which add machine relocations which are
/// fixed up by the relocation stage.
void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
- bool NeedStub, bool Indirect, intptr_t ACPV = 0);
+ bool MayNeedFarStub, bool Indirect,
+ intptr_t ACPV = 0);
void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
void emitConstPoolAddress(unsigned CPI, unsigned Reloc);
void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc);
@@ -277,13 +278,13 @@ unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI,
///
template<class CodeEmitter>
void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
- bool NeedStub, bool Indirect,
+ bool MayNeedFarStub, bool Indirect,
intptr_t ACPV) {
MachineRelocation MR = Indirect
? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
- GV, ACPV, NeedStub)
+ GV, ACPV, MayNeedFarStub)
: MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- GV, ACPV, NeedStub);
+ GV, ACPV, MayNeedFarStub);
MCE.addRelocation(MR);
}
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 9819625..d22c43a 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -31,6 +31,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include <algorithm>
using namespace llvm;
@@ -42,6 +43,13 @@ STATISTIC(NumTBs, "Number of table branches generated");
STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
+STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
+STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
+
+
+static cl::opt<bool>
+AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
+ cl::desc("Adjust basic block layout to better use TB[BH]"));
namespace {
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
@@ -174,6 +182,7 @@ namespace {
void DoInitialPlacement(MachineFunction &MF,
std::vector<MachineInstr*> &CPEMIs);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+ void JumpTableFunctionScan(MachineFunction &MF);
void InitialFunctionScan(MachineFunction &MF,
const std::vector<MachineInstr*> &CPEMIs);
MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
@@ -201,7 +210,10 @@ namespace {
bool UndoLRSpillRestore();
bool OptimizeThumb2Instructions(MachineFunction &MF);
bool OptimizeThumb2Branches(MachineFunction &MF);
+ bool ReorderThumb2JumpTables(MachineFunction &MF);
bool OptimizeThumb2JumpTables(MachineFunction &MF);
+ MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
+ MachineBasicBlock *JTBB);
unsigned GetOffsetOf(MachineInstr *MI) const;
void dumpBBs();
@@ -262,6 +274,18 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// the numbers agree with the position of the block in the function.
MF.RenumberBlocks();
+ // Try to reorder and otherwise adjust the block layout to make good use
+ // of the TB[BH] instructions.
+ bool MadeChange = false;
+ if (isThumb2 && AdjustJumpTableBlocks) {
+ JumpTableFunctionScan(MF);
+ MadeChange |= ReorderThumb2JumpTables(MF);
+ // Data is out of date, so clear it. It'll be re-computed later.
+ T2JumpTables.clear();
+ // Blocks may have shifted around. Keep the numbering up to date.
+ MF.RenumberBlocks();
+ }
+
// Thumb1 functions containing constant pools get 4-byte alignment.
// This is so we can keep exact track of where the alignment padding goes.
@@ -292,7 +316,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// Iteratively place constant pool entries and fix up branches until there
// is no change.
- bool MadeChange = false;
unsigned NoCPIters = 0, NoBRIters = 0;
while (true) {
bool CPChange = false;
@@ -409,6 +432,21 @@ ARMConstantIslands::CPEntry
return NULL;
}
+/// JumpTableFunctionScan - Do a scan of the function, building up
+/// information about the sizes of each block and the locations of all
+/// the jump tables.
+void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I)
+ if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+ T2JumpTables.push_back(I);
+ }
+}
+
/// InitialFunctionScan - Do the initial scan of the function, building up
/// information about the sizes of each block, the location of all the water,
/// and finding all of the constant pool users.
@@ -541,8 +579,8 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
Scale = 4; // +(offset_8*4)
break;
- case ARM::FLDD:
- case ARM::FLDS:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
Bits = 8;
Scale = 4; // +-(offset_8*4)
NegOk = true;
@@ -1552,7 +1590,6 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
return MadeChange;
}
-
/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
/// jumptables when it's possible.
bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
@@ -1560,7 +1597,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
// FIXME: After the tables are shrunk, can we get rid some of the
// constantpool tables?
- const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
MachineInstr *MI = T2JumpTables[i];
@@ -1660,3 +1697,99 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
return MadeChange;
}
+
+/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
+/// jump tables always branch forwards, since that's what tbb and tbh need.
+bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+ MachineInstr *MI = T2JumpTables[i];
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned NumOps = TID.getNumOperands();
+ unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2);
+ MachineOperand JTOP = MI->getOperand(JTOpIdx);
+ unsigned JTI = JTOP.getIndex();
+ assert(JTI < JT.size());
+
+ // We prefer if target blocks for the jump table come after the jump
+ // instruction so we can use TB[BH]. Loop through the target blocks
+ // and try to adjust them such that that's true.
+ int JTNumber = MI->getParent()->getNumber();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *MBB = JTBBs[j];
+ int DTNumber = MBB->getNumber();
+
+ if (DTNumber < JTNumber) {
+ // The destination precedes the switch. Try to move the block forward
+ // so we have a positive offset.
+ MachineBasicBlock *NewBB =
+ AdjustJTTargetBlockForward(MBB, MI->getParent());
+ if (NewBB)
+ MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+MachineBasicBlock *ARMConstantIslands::
+AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
+{
+ MachineFunction &MF = *BB->getParent();
+
+ // If it's the destination block is terminated by an unconditional branch,
+ // try to move it; otherwise, create a new block following the jump
+ // table that branches back to the actual target. This is a very simple
+ // heuristic. FIXME: We can definitely improve it.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ SmallVector<MachineOperand, 4> CondPrior;
+ MachineFunction::iterator BBi = BB;
+ MachineFunction::iterator OldPrior = prior(BBi);
+
+ // If the block terminator isn't analyzable, don't try to move the block
+ bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
+
+ // If the block ends in an unconditional branch, move it. The prior block
+ // has to have an analyzable terminator for us to move this one. Be paranoid
+ // and make sure we're not trying to move the entry block of the function.
+ if (!B && Cond.empty() && BB != MF.begin() &&
+ !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
+ BB->moveAfter(JTBB);
+ OldPrior->updateTerminator();
+ BB->updateTerminator();
+ // Update numbering to account for the block being moved.
+ MF.RenumberBlocks();
+ ++NumJTMoved;
+ return NULL;
+ }
+
+ // Create a new MBB for the code after the jump BB.
+ MachineBasicBlock *NewBB =
+ MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+ MachineFunction::iterator MBBI = JTBB; ++MBBI;
+ MF.insert(MBBI, NewBB);
+
+ // Add an unconditional branch from NewBB to BB.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond directly to anything in the source.
+ assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
+ BuildMI(NewBB, DebugLoc::getUnknownLoc(), TII->get(ARM::t2B)).addMBB(BB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ MF.RenumberBlocks(NewBB);
+
+ // Update the CFG.
+ NewBB->addSuccessor(BB);
+ JTBB->removeSuccessor(BB);
+ JTBB->addSuccessor(NewBB);
+
+ ++NumJTInserted;
+ return NewBB;
+}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index efa941a..90dd0c7 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -62,9 +62,10 @@ int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
ARMConstantPoolValue *CPV =
(ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
if (CPV->CVal == CVal &&
- CPV->S == S &&
CPV->LabelId == LabelId &&
- CPV->PCAdjust == PCAdjust)
+ CPV->PCAdjust == PCAdjust &&
+ (CPV->S == S || strcmp(CPV->S, S) == 0) &&
+ (CPV->Modifier == Modifier || strcmp(CPV->Modifier, Modifier) == 0))
return i;
}
}
@@ -84,6 +85,23 @@ ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
ID.AddInteger(PCAdjust);
}
+bool
+ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
+ if (ACPV->Kind == Kind &&
+ ACPV->CVal == CVal &&
+ ACPV->PCAdjust == PCAdjust &&
+ (ACPV->S == S || strcmp(ACPV->S, S) == 0) &&
+ (ACPV->Modifier == Modifier || strcmp(ACPV->Modifier, Modifier) == 0)) {
+ if (ACPV->LabelId == LabelId)
+ return true;
+ // Two PC relative constpool entries containing the same GV address or
+ // external symbols. FIXME: What about blockaddress?
+ if (Kind == ARMCP::CPValue || Kind == ARMCP::CPExtSymbol)
+ return true;
+ }
+ return false;
+}
+
void ARMConstantPoolValue::dump() const {
errs() << " " << *this;
}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 8fb3f92..741acde 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -81,6 +81,10 @@ public:
virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
+ /// hasSameValue - Return true if this ARM constpool value
+ /// can share the same constantpool entry as another ARM constpool value.
+ bool hasSameValue(ARMConstantPoolValue *ACPV);
+
void print(raw_ostream *O) const { if (O) print(*O); }
void print(raw_ostream &O) const;
void dump() const;
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
new file mode 100644
index 0000000..4d0f899
--- /dev/null
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -0,0 +1,115 @@
+//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expand pseudo instructions into target
+// instructions to allow proper scheduling, if-conversion, and other late
+// optimizations. This pass should be run after register allocation but before
+// post- regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-pseudo"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+namespace {
+ class ARMExpandPseudo : public MachineFunctionPass {
+ public:
+ static char ID;
+ ARMExpandPseudo() : MachineFunctionPass(&ID) {}
+
+ const TargetInstrInfo *TII;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM pseudo instruction expansion pass";
+ }
+
+ private:
+ bool ExpandMBB(MachineBasicBlock &MBB);
+ };
+ char ARMExpandPseudo::ID = 0;
+}
+
+bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock::iterator NMBBI = next(MBBI);
+
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ default: break;
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
+ ? ARM::tLDRpci : ARM::t2LDRpci;
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (!MI.getOperand(0).isDead()) {
+ MachineInstr *NewMI =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(NewLdOpc), DstReg)
+ .addOperand(MI.getOperand(1)));
+ NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
+ .addReg(DstReg, getDefRegState(true))
+ .addReg(DstReg)
+ .addOperand(MI.getOperand(2));
+ }
+ MI.eraseFromParent();
+ Modified = true;
+ break;
+ }
+ case ARM::t2MOVi32imm: {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned Imm = MI.getOperand(1).getImm();
+ unsigned Lo16 = Imm & 0xffff;
+ unsigned Hi16 = (Imm >> 16) & 0xffff;
+ if (!MI.getOperand(0).isDead()) {
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::t2MOVi16), DstReg)
+ .addImm(Lo16));
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::t2MOVTi16))
+ .addReg(DstReg, getDefRegState(true))
+ .addReg(DstReg).addImm(Hi16));
+ }
+ MI.eraseFromParent();
+ Modified = true;
+ }
+ // FIXME: expand t2MOVi32imm
+ }
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
+ ++MFI)
+ Modified |= ExpandMBB(*MFI);
+ return Modified;
+}
+
+/// createARMExpandPseudoPass - returns an instance of the pseudo instruction
+/// expansion pass.
+FunctionPass *llvm::createARMExpandPseudoPass() {
+ return new ARMExpandPseudo();
+}
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 1489cab..9be7454 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -81,7 +81,7 @@ public:
bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base,
SDValue &Offset);
bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update,
- SDValue &Opc);
+ SDValue &Opc, SDValue &Align);
bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset,
SDValue &Label);
@@ -187,8 +187,6 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
void ARMDAGToDAGISel::InstructionSelect() {
- DEBUG(BB->dump());
-
SelectRoot(*CurDAG);
CurDAG->RemoveDeadNodes();
}
@@ -491,11 +489,13 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N,
SDValue &Addr, SDValue &Update,
- SDValue &Opc) {
+ SDValue &Opc, SDValue &Align) {
Addr = N;
// Default to no writeback.
Update = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32);
+ // Default to no alignment.
+ Align = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -1010,8 +1010,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
SDNode *N = Op.getNode();
DebugLoc dl = N->getDebugLoc();
- SDValue MemAddr, MemUpdate, MemOpc;
- if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+ SDValue MemAddr, MemUpdate, MemOpc, Align;
+ if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
return NULL;
SDValue Chain = N->getOperand(0);
@@ -1036,10 +1036,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
if (is64BitVector) {
unsigned Opc = DOpcodes[OpcodeIndex];
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
+ const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
std::vector<EVT> ResTys(NumVecs, VT);
ResTys.push_back(MVT::Other);
- return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
+ return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
}
EVT RegVT = GetNEONSubregVT(VT);
@@ -1047,10 +1047,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
// Quad registers are directly supported for VLD2,
// loading 2 pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
+ const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
std::vector<EVT> ResTys(4, VT);
ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
Chain = SDValue(VLd, 4);
// Combine the even and odd subregs to produce the result.
@@ -1071,14 +1071,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
// Load the even subregs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain };
- SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4);
+ const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
+ SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 5);
Chain = SDValue(VLdA, NumVecs+1);
// Load the odd subregs.
Opc = QOpcodes1[OpcodeIndex];
- const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, Chain };
- SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 4);
+ const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc,
+ Align, Chain };
+ SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 5);
Chain = SDValue(VLdB, NumVecs+1);
// Combine the even and odd subregs to produce the result.
@@ -1098,8 +1099,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
SDNode *N = Op.getNode();
DebugLoc dl = N->getDebugLoc();
- SDValue MemAddr, MemUpdate, MemOpc;
- if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+ SDValue MemAddr, MemUpdate, MemOpc, Align;
+ if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
return NULL;
SDValue Chain = N->getOperand(0);
@@ -1126,13 +1127,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
Ops.push_back(MemAddr);
Ops.push_back(MemUpdate);
Ops.push_back(MemOpc);
+ Ops.push_back(Align);
if (is64BitVector) {
unsigned Opc = DOpcodes[OpcodeIndex];
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
Ops.push_back(N->getOperand(Vec+3));
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+4);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
}
EVT RegVT = GetNEONSubregVT(VT);
@@ -1147,7 +1149,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
N->getOperand(Vec+3)));
}
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 8);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9);
}
// Otherwise, quad registers are stored with two separate instructions,
@@ -1163,18 +1165,18 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
Ops.push_back(Chain);
unsigned Opc = QOpcodes0[OpcodeIndex];
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+4);
+ MVT::Other, Ops.data(), NumVecs+5);
Chain = SDValue(VStA, 1);
// Store the odd subregs.
Ops[0] = SDValue(VStA, 0); // MemAddr
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+ Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
N->getOperand(Vec+3));
- Ops[NumVecs+3] = Chain;
+ Ops[NumVecs+4] = Chain;
Opc = QOpcodes1[OpcodeIndex];
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+4);
+ MVT::Other, Ops.data(), NumVecs+5);
Chain = SDValue(VStB, 1);
ReplaceUses(SDValue(N, 0), Chain);
return NULL;
@@ -1188,8 +1190,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
SDNode *N = Op.getNode();
DebugLoc dl = N->getDebugLoc();
- SDValue MemAddr, MemUpdate, MemOpc;
- if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+ SDValue MemAddr, MemUpdate, MemOpc, Align;
+ if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
return NULL;
SDValue Chain = N->getOperand(0);
@@ -1226,6 +1228,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
Ops.push_back(MemAddr);
Ops.push_back(MemUpdate);
Ops.push_back(MemOpc);
+ Ops.push_back(Align);
unsigned Opc = 0;
if (is64BitVector) {
@@ -1463,8 +1466,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
}
break;
}
- case ARMISD::FMRRD:
- return CurDAG->getMachineNode(ARM::FMRRD, dl, MVT::i32, MVT::i32,
+ case ARMISD::VMOVRRD:
+ return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
Op.getOperand(0), getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32));
case ISD::UMUL_LOHI: {
@@ -1653,10 +1656,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
: ARM::MOVCCr;
break;
case MVT::f32:
- Opc = ARM::FCPYScc;
+ Opc = ARM::VMOVScc;
break;
case MVT::f64:
- Opc = ARM::FCPYDcc;
+ Opc = ARM::VMOVDcc;
break;
}
return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
@@ -1680,10 +1683,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
default: assert(false && "Illegal conditional move type!");
break;
case MVT::f32:
- Opc = ARM::FNEGScc;
+ Opc = ARM::VNEGScc;
break;
case MVT::f64:
- Opc = ARM::FNEGDcc;
+ Opc = ARM::VNEGDcc;
break;
}
return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index b6ce5dd..c3af8e6 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -133,7 +133,7 @@ static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
}
ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
- : TargetLowering(TM, createTLOF(TM)), ARMPCLabelIndex(0) {
+ : TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
if (Subtarget->isTargetDarwin()) {
@@ -389,7 +389,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
- // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2.
+ // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR iff target supports vfp2.
setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
// We want to custom lower some of our intrinsics.
@@ -434,7 +434,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
// We have target-specific dag combine patterns for the following nodes:
- // ARMISD::FMRRD - No need to call setTargetDAGCombine
+ // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
@@ -493,8 +493,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
- case ARMISD::FMRRD: return "ARMISD::FMRRD";
- case ARMISD::FMDRR: return "ARMISD::FMDRR";
+ case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
+ case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
@@ -790,7 +790,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
- Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+ Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
if (VA.getLocVT() == MVT::v2f64) {
SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
@@ -805,7 +805,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
- Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+ Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
DAG.getConstant(1, MVT::i32));
}
@@ -870,7 +870,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
SmallVector<SDValue, 8> &MemOpChains,
ISD::ArgFlagsTy Flags) {
- SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
+ SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Arg);
RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
@@ -1004,6 +1004,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
bool isDirect = false;
bool isARMFunc = false;
bool isLocalARMFunc = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
GlobalValue *GV = G->getGlobal();
isDirect = true;
@@ -1015,6 +1017,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
isLocalARMFunc = !Subtarget->isThumb() && !isExt;
// tBX takes a register source operand.
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
ARMPCLabelIndex,
ARMCP::CPValue, 4);
@@ -1023,7 +1026,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
PseudoSourceValue::getConstantPool(), 0);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
} else
@@ -1036,6 +1039,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// tBX takes a register source operand.
const char *Sym = S->getSymbol();
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
Sym, ARMPCLabelIndex, 4);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
@@ -1043,7 +1047,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
PseudoSourceValue::getConstantPool(), 0);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
} else
@@ -1145,7 +1149,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
// Extract the first half and return it in two registers.
SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
DAG.getConstant(0, MVT::i32));
- SDValue HalfGPRs = DAG.getNode(ARMISD::FMRRD, dl,
+ SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Half);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
@@ -1162,7 +1166,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
}
// Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
// available.
- SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
+ SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
Flag = Chain.getValue(1);
@@ -1208,6 +1212,9 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
}
SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = 0;
DebugLoc DL = Op.getDebugLoc();
EVT PtrVT = getPointerTy();
BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
@@ -1217,6 +1224,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
} else {
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMPCLabelIndex = AFI->createConstPoolEntryUId();
ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
ARMCP::CPBlockAddress,
PCAdj);
@@ -1227,7 +1235,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
PseudoSourceValue::getConstantPool(), 0);
if (RelocM == Reloc::Static)
return Result;
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
}
@@ -1238,6 +1246,9 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
DebugLoc dl = GA->getDebugLoc();
EVT PtrVT = getPointerTy();
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
ARMConstantPoolValue *CPV =
new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
ARMCP::CPValue, PCAdj, "tlsgd", true);
@@ -1247,7 +1258,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
PseudoSourceValue::getConstantPool(), 0);
SDValue Chain = Argument.getValue(1);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
// call __tls_get_addr.
@@ -1279,7 +1290,10 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
if (GV->isDeclaration()) {
- // initial exec model
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+ // Initial exec model.
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV =
new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
@@ -1290,7 +1304,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
PseudoSourceValue::getConstantPool(), 0);
Chain = Offset.getValue(1);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
@@ -1355,6 +1369,9 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = 0;
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
@@ -1363,6 +1380,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
if (RelocM == Reloc::Static)
CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
else {
+ ARMPCLabelIndex = AFI->createConstPoolEntryUId();
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
ARMConstantPoolValue *CPV =
new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
@@ -1375,7 +1393,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SDValue Chain = Result.getValue(1);
if (RelocM == Reloc::PIC_) {
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
@@ -1390,6 +1408,9 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
SelectionDAG &DAG){
assert(Subtarget->isTargetELF() &&
"GLOBAL OFFSET TABLE not implemented for non-ELF targets");
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
@@ -1400,7 +1421,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
PseudoSourceValue::getConstantPool(), 0);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
@@ -1416,6 +1437,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
}
case Intrinsic::eh_sjlj_lsda: {
MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
@@ -1433,7 +1456,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
SDValue Chain = Result.getValue(1);
if (RelocM == Reloc::PIC_) {
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
return Result;
@@ -1522,7 +1545,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
if (NextVA.isMemLoc()) {
unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8;
MachineFrameInfo *MFI = MF.getFrameInfo();
- int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset());
+ int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset(),
+ true, false);
// Create load node to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -1533,7 +1557,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
}
- return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, ArgValue, ArgValue2);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
}
SDValue
@@ -1636,7 +1660,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
- int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset());
+ int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
+ true, false);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -1664,7 +1689,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// the result of va_next.
AFI->setVarArgsRegSaveSize(VARegSaveSize);
VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset +
- VARegSaveSize - VARegSize);
+ VARegSaveSize - VARegSize,
+ true, false);
SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
SmallVector<SDValue, 4> MemOps;
@@ -1688,7 +1714,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
&MemOps[0], MemOps.size());
} else
// This will point to the next argument passed via stack.
- VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
+ VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset, true, false);
}
return Chain;
@@ -1710,46 +1736,41 @@ static bool isFloatingPointZero(SDValue Op) {
return false;
}
-static bool isLegalCmpImmediate(unsigned C, bool isThumb1Only) {
- return ( isThumb1Only && (C & ~255U) == 0) ||
- (!isThumb1Only && ARM_AM::getSOImmVal(C) != -1);
-}
-
/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
/// the given operands.
-static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- SDValue &ARMCC, SelectionDAG &DAG, bool isThumb1Only,
- DebugLoc dl) {
+SDValue
+ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
unsigned C = RHSC->getZExtValue();
- if (!isLegalCmpImmediate(C, isThumb1Only)) {
+ if (!isLegalICmpImmediate(C)) {
// Constant does not fit, try adjusting it by one?
switch (CC) {
default: break;
case ISD::SETLT:
case ISD::SETGE:
- if (isLegalCmpImmediate(C-1, isThumb1Only)) {
+ if (isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
RHS = DAG.getConstant(C-1, MVT::i32);
}
break;
case ISD::SETULT:
case ISD::SETUGE:
- if (C > 0 && isLegalCmpImmediate(C-1, isThumb1Only)) {
+ if (C > 0 && isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
RHS = DAG.getConstant(C-1, MVT::i32);
}
break;
case ISD::SETLE:
case ISD::SETGT:
- if (isLegalCmpImmediate(C+1, isThumb1Only)) {
+ if (isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
RHS = DAG.getConstant(C+1, MVT::i32);
}
break;
case ISD::SETULE:
case ISD::SETUGT:
- if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb1Only)) {
+ if (C < 0xffffffff && isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
RHS = DAG.getConstant(C+1, MVT::i32);
}
@@ -1785,8 +1806,7 @@ static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
}
-static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
+SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -1798,7 +1818,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
if (LHS.getValueType() == MVT::i32) {
SDValue ARMCC;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
}
@@ -1820,8 +1840,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
return Result;
}
-static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
+SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue LHS = Op.getOperand(2);
@@ -1832,7 +1851,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
if (LHS.getValueType() == MVT::i32) {
SDValue ARMCC;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
Chain, Dest, ARMCC, CCR,Cmp);
}
@@ -2049,16 +2068,16 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
SDValue Op = N->getOperand(0);
DebugLoc dl = N->getDebugLoc();
if (N->getValueType(0) == MVT::f64) {
- // Turn i64->f64 into FMDRR.
+ // Turn i64->f64 into VMOVDRR.
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
DAG.getConstant(0, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
DAG.getConstant(1, MVT::i32));
- return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
}
- // Turn f64->i64 into FMRRD.
- SDValue Cvt = DAG.getNode(ARMISD::FMRRD, dl,
+ // Turn f64->i64 into VMOVRRD.
+ SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
// Merge the pieces into a single i64 value.
@@ -2115,8 +2134,7 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
/// i32 values and take a 2 x i32 value to shift plus a shift amount.
-static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
+SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
@@ -2140,7 +2158,7 @@ static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
- ARMCC, DAG, ST->isThumb1Only(), dl);
+ ARMCC, DAG, dl);
SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC,
CCR, Cmp);
@@ -2151,8 +2169,7 @@ static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
/// i32 values and take a 2 x i32 value to shift plus a shift amount.
-static SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
+SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
@@ -2174,7 +2191,7 @@ static SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG,
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
- ARMCC, DAG, ST->isThumb1Only(), dl);
+ ARMCC, DAG, dl);
SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC,
CCR, Cmp);
@@ -2860,8 +2877,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
LowerGlobalAddressELF(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget);
- case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);
@@ -2878,9 +2895,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
case ISD::SHL:
case ISD::SRL:
case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
- case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG, Subtarget);
+ case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
case ISD::SRL_PARTS:
- case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG, Subtarget);
+ case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
case ISD::VSETCC: return LowerVSETCC(Op, DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
@@ -3155,12 +3172,12 @@ static SDValue PerformSUBCombine(SDNode *N,
return SDValue();
}
-/// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD.
-static SDValue PerformFMRRDCombine(SDNode *N,
+/// PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
+static SDValue PerformVMOVRRDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
// fmrrd(fmdrr x, y) -> x,y
SDValue InDouble = N->getOperand(0);
- if (InDouble.getOpcode() == ARMISD::FMDRR)
+ if (InDouble.getOpcode() == ARMISD::VMOVDRR)
return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
return SDValue();
}
@@ -3455,7 +3472,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::ADD: return PerformADDCombine(N, DCI);
case ISD::SUB: return PerformSUBCombine(N, DCI);
- case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI);
+ case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
@@ -3683,6 +3700,18 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
return true;
}
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ if (!Subtarget->isThumb())
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ if (Subtarget->isThumb2())
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ return Imm >= 0 && Imm <= 255;
+}
+
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
bool isSEXTLoad, SDValue &Base,
SDValue &Offset, bool &isInc,
@@ -3737,7 +3766,7 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
return true;
}
- // FIXME: Use FLDM / FSTM to emulate indexed FP load / store.
+ // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
return false;
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 9c7a91d..4f31f8a 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -62,8 +62,8 @@ namespace llvm {
SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
- FMRRD, // double to two gprs.
- FMDRR, // Two gprs to double.
+ VMOVRRD, // double to two gprs.
+ VMOVDRR, // Two gprs to double.
EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
@@ -180,6 +180,12 @@ namespace llvm {
virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can compare
+ /// a register against the immediate without having to materialize the
+ /// immediate into a register.
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
@@ -278,8 +284,12 @@ namespace llvm {
SelectionDAG &DAG);
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG);
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG);
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain,
@@ -315,6 +325,9 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
DebugLoc dl, SelectionDAG &DAG);
+
+ SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl);
};
}
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 86bbe2a..87bb12b 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -80,22 +80,26 @@ bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
}
void ARMInstrInfo::
-reMaterialize(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SubIdx,
- const MachineInstr *Orig) const {
+reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const {
DebugLoc dl = Orig->getDebugLoc();
- if (Orig->getOpcode() == ARM::MOVi2pieces) {
+ unsigned Opcode = Orig->getOpcode();
+ switch (Opcode) {
+ default:
+ break;
+ case ARM::MOVi2pieces: {
RI.emitLoadConstPool(MBB, I, dl,
DestReg, SubIdx,
Orig->getOperand(1).getImm(),
(ARMCC::CondCodes)Orig->getOperand(2).getImm(),
Orig->getOperand(3).getReg());
+ MachineInstr *NewMI = prior(I);
+ NewMI->getOperand(0).setSubReg(SubIdx);
return;
}
+ }
- MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
- MI->getOperand(0).setReg(DestReg);
- MBB.insert(I, MI);
+ return ARMBaseInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, TRI);
}
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 5d1678d..4319577 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -35,15 +35,16 @@ public:
// Return true if the block does not fall through.
bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const;
+
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
const ARMRegisterInfo &getRegisterInfo() const { return RI; }
-
- void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SubIdx,
- const MachineInstr *Orig) const;
};
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index cbe80b4..3fe634e 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -340,9 +340,9 @@ def addrmode5 : Operand<i32>,
// addrmode6 := reg with optional writeback
//
def addrmode6 : Operand<i32>,
- ComplexPattern<i32, 3, "SelectAddrMode6", []> {
+ ComplexPattern<i32, 4, "SelectAddrMode6", []> {
let PrintMethod = "printAddrMode6Operand";
- let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm);
+ let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm);
}
// addrmodepc := pc + reg
@@ -377,15 +377,13 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
IIC_iALUr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
let isCommutable = Commutable;
}
def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
IIC_iALUsr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{25} = 0;
}
}
@@ -396,24 +394,22 @@ let Defs = [CPSR] in {
multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, opc, "s\t$dst, $a, $b",
+ IIC_iALUi, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
let Inst{20} = 1;
let Inst{25} = 1;
}
def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
- IIC_iALUr, opc, "s\t$dst, $a, $b",
+ IIC_iALUr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
let isCommutable = Commutable;
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
let Inst{20} = 1;
let Inst{25} = 0;
}
def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, opc, "s\t$dst, $a, $b",
+ IIC_iALUsr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{20} = 1;
let Inst{25} = 0;
}
@@ -435,7 +431,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, IIC_iCMPr,
opc, "\t$a, $b",
[(opnode GPR:$a, GPR:$b)]> {
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
let Inst{20} = 1;
let Inst{25} = 0;
let isCommutable = Commutable;
@@ -443,8 +439,6 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iCMPsr,
opc, "\t$a, $b",
[(opnode GPR:$a, so_reg:$b)]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{20} = 1;
let Inst{25} = 0;
}
@@ -501,20 +495,22 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
Requires<[IsARM, CarryDefIsUnused]> {
let isCommutable = Commutable;
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
}
def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
Requires<[IsARM, CarryDefIsUnused]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{25} = 0;
}
- // Carry setting variants
+}
+// Carry setting variants
+let Defs = [CPSR] in {
+multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
- DPFrm, IIC_iALUi, !strconcat(opc, "s\t$dst, $a, $b"),
+ DPFrm, IIC_iALUi, !strconcat(opc, "\t$dst, $a, $b"),
[(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
Requires<[IsARM, CarryDefIsUsed]> {
let Defs = [CPSR];
@@ -522,26 +518,25 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{25} = 1;
}
def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- DPFrm, IIC_iALUr, !strconcat(opc, "s\t$dst, $a, $b"),
+ DPFrm, IIC_iALUr, !strconcat(opc, "\t$dst, $a, $b"),
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
Requires<[IsARM, CarryDefIsUsed]> {
let Defs = [CPSR];
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
let Inst{20} = 1;
let Inst{25} = 0;
}
def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
- DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "s\t$dst, $a, $b"),
+ DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$dst, $a, $b"),
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
Requires<[IsARM, CarryDefIsUsed]> {
let Defs = [CPSR];
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{20} = 1;
let Inst{25} = 0;
}
}
}
+}
//===----------------------------------------------------------------------===//
// Instructions
@@ -652,6 +647,7 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
let isReturn = 1, isTerminator = 1, isBarrier = 1 in
def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
"bx", "\tlr", [(ARMretflag)]> {
+ let Inst{3-0} = 0b1110;
let Inst{7-4} = 0b0001;
let Inst{19-8} = 0b111111111111;
let Inst{27-20} = 0b00010010;
@@ -664,6 +660,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
let Inst{7-4} = 0b0001;
let Inst{19-8} = 0b111111111111;
let Inst{27-20} = 0b00010010;
+ let Inst{31-28} = 0b1110;
}
}
@@ -673,7 +670,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
hasExtraDefRegAllocReq = 1 in
def LDM_RET : AXI4ld<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
- LdStMulFrm, IIC_Br, "ldm${p}${addr:submode}\t$addr, $wb",
+ LdStMulFrm, IIC_Br, "ldm${addr:submode}${p}\t$addr, $wb",
[]>;
// On non-Darwin platforms R9 is callee-saved.
@@ -762,6 +759,7 @@ let isBranch = 1, isTerminator = 1 in {
def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
IIC_Br, "mov\tpc, $target \n$jt",
[(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
+ let Inst{15-12} = 0b1111;
let Inst{20} = 0; // S Bit
let Inst{24-21} = 0b1101;
let Inst{27-25} = 0b000;
@@ -771,6 +769,7 @@ let isBranch = 1, isTerminator = 1 in {
IIC_Br, "ldr\tpc, $target \n$jt",
[(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
imm:$id)]> {
+ let Inst{15-12} = 0b1111;
let Inst{20} = 1; // L bit
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
@@ -782,6 +781,7 @@ let isBranch = 1, isTerminator = 1 in {
IIC_Br, "add\tpc, $target, $idx \n$jt",
[(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
imm:$id)]> {
+ let Inst{15-12} = 0b1111;
let Inst{20} = 0; // S bit
let Inst{24-21} = 0b0100;
let Inst{27-25} = 0b000;
@@ -813,26 +813,26 @@ def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
// Loads with zero extension
def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldr", "h\t$dst, $addr",
+ IIC_iLoadr, "ldrh", "\t$dst, $addr",
[(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
- IIC_iLoadr, "ldr", "b\t$dst, $addr",
+ IIC_iLoadr, "ldrb", "\t$dst, $addr",
[(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
// Loads with sign extension
def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldr", "sh\t$dst, $addr",
+ IIC_iLoadr, "ldrsh", "\t$dst, $addr",
[(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>;
def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldr", "sb\t$dst, $addr",
+ IIC_iLoadr, "ldrsb", "\t$dst, $addr",
[(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldr", "d\t$dst1, $addr",
+ IIC_iLoadr, "ldrd", "\t$dst1, $addr",
[]>, Requires<[IsARM, HasV5TE]>;
// Indexed loads
@@ -846,35 +846,35 @@ def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
def LDRH_PRE : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb),
(ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
- "ldr", "h\t$dst, $addr!", "$addr.base = $base_wb", []>;
+ "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
- "ldr", "h\t$dst, [$base], $offset", "$base = $base_wb", []>;
+ "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
def LDRB_PRE : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb),
(ins addrmode2:$addr), LdFrm, IIC_iLoadru,
- "ldr", "b\t$dst, $addr!", "$addr.base = $base_wb", []>;
+ "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
- "ldr", "b\t$dst, [$base], $offset", "$base = $base_wb", []>;
+ "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb),
(ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
- "ldr", "sh\t$dst, $addr!", "$addr.base = $base_wb", []>;
+ "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
- "ldr", "sh\t$dst, [$base], $offset", "$base = $base_wb", []>;
+ "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
(ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
- "ldr", "sb\t$dst, $addr!", "$addr.base = $base_wb", []>;
+ "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
- "ldr", "sb\t$dst, [$base], $offset", "$base = $base_wb", []>;
+ "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
}
// Store
@@ -884,18 +884,18 @@ def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
// Stores with truncate
def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, IIC_iStorer,
- "str", "h\t$src, $addr",
+ "strh", "\t$src, $addr",
[(truncstorei16 GPR:$src, addrmode3:$addr)]>;
def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
- "str", "b\t$src, $addr",
+ "strb", "\t$src, $addr",
[(truncstorei8 GPR:$src, addrmode2:$addr)]>;
// Store doubleword
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
StMiscFrm, IIC_iStorer,
- "str", "d\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
+ "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
// Indexed stores
def STR_PRE : AI2stwpr<(outs GPR:$base_wb),
@@ -915,28 +915,28 @@ def STR_POST : AI2stwpo<(outs GPR:$base_wb),
def STRH_PRE : AI3sthpr<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am3offset:$offset),
StMiscFrm, IIC_iStoreru,
- "str", "h\t$src, [$base, $offset]!", "$base = $base_wb",
+ "strh", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb,
(pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
def STRH_POST: AI3sthpo<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am3offset:$offset),
StMiscFrm, IIC_iStoreru,
- "str", "h\t$src, [$base], $offset", "$base = $base_wb",
+ "strh", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb, (post_truncsti16 GPR:$src,
GPR:$base, am3offset:$offset))]>;
def STRB_PRE : AI2stbpr<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am2offset:$offset),
StFrm, IIC_iStoreru,
- "str", "b\t$src, [$base, $offset]!", "$base = $base_wb",
+ "strb", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
GPR:$base, am2offset:$offset))]>;
def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am2offset:$offset),
StFrm, IIC_iStoreru,
- "str", "b\t$src, [$base], $offset", "$base = $base_wb",
+ "strb", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb, (post_truncsti8 GPR:$src,
GPR:$base, am2offset:$offset))]>;
@@ -947,13 +947,13 @@ def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
def LDM : AXI4ld<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
- LdStMulFrm, IIC_iLoadm, "ldm${p}${addr:submode}\t$addr, $wb",
+ LdStMulFrm, IIC_iLoadm, "ldm${addr:submode}${p}\t$addr, $wb",
[]>;
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
def STM : AXI4st<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
- LdStMulFrm, IIC_iStorem, "stm${p}${addr:submode}\t$addr, $wb",
+ LdStMulFrm, IIC_iStorem, "stm${addr:submode}${p}\t$addr, $wb",
[]>;
//===----------------------------------------------------------------------===//
@@ -963,15 +963,13 @@ def STM : AXI4st<(outs),
let neverHasSideEffects = 1 in
def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
"mov", "\t$dst, $src", []>, UnaryDP {
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
}
def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src),
DPSoRegFrm, IIC_iMOVsr,
"mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{25} = 0;
}
@@ -1016,10 +1014,10 @@ def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi,
let Defs = [CPSR] in {
def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
- IIC_iMOVsi, "mov", "s\t$dst, $src, lsr #1",
+ IIC_iMOVsi, "movs", "\t$dst, $src, lsr #1",
[(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP;
def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
- IIC_iMOVsi, "mov", "s\t$dst, $src, asr #1",
+ IIC_iMOVsi, "movs", "\t$dst, $src, asr #1",
[(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP;
}
@@ -1095,15 +1093,19 @@ defm SUB : AsI1_bin_irs<0b0010, "sub",
BinOpFrag<(sub node:$LHS, node:$RHS)>>;
// ADD and SUB with 's' bit set.
-defm ADDS : AI1_bin_s_irs<0b0100, "add",
- BinOpFrag<(addc node:$LHS, node:$RHS)>>;
-defm SUBS : AI1_bin_s_irs<0b0010, "sub",
+defm ADDS : AI1_bin_s_irs<0b0100, "adds",
+ BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
+defm SUBS : AI1_bin_s_irs<0b0010, "subs",
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
defm ADC : AI1_adde_sube_irs<0b0101, "adc",
BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
+ BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
+defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
+ BinOpFrag<(sube node:$LHS, node:$RHS)>>;
// These don't define reg/reg forms, because they are handled above.
def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
@@ -1115,24 +1117,20 @@ def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
IIC_iALUsr, "rsb", "\t$dst, $a, $b",
[(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{25} = 0;
}
// RSB with 's' bit set.
let Defs = [CPSR] in {
def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, "rsb", "s\t$dst, $a, $b",
+ IIC_iALUi, "rsbs", "\t$dst, $a, $b",
[(set GPR:$dst, (subc so_imm:$b, GPR:$a))]> {
let Inst{20} = 1;
let Inst{25} = 1;
}
def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, "rsb", "s\t$dst, $a, $b",
+ IIC_iALUsr, "rsbs", "\t$dst, $a, $b",
[(set GPR:$dst, (subc so_reg:$b, GPR:$a))]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{20} = 1;
let Inst{25} = 0;
}
@@ -1149,8 +1147,6 @@ def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b",
[(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
Requires<[IsARM, CarryDefIsUnused]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{25} = 0;
}
}
@@ -1168,8 +1164,6 @@ def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b",
[(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
Requires<[IsARM, CarryDefIsUnused]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{20} = 1;
let Inst{25} = 0;
}
@@ -1216,14 +1210,11 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
"mvn", "\t$dst, $src",
[(set GPR:$dst, (not GPR:$src))]>, UnaryDP {
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
}
def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
IIC_iMOVsr, "mvn", "\t$dst, $src",
- [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP {
- let Inst{4} = 1;
- let Inst{7} = 0;
-}
+ [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP;
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm,
IIC_iMOVi, "mvn", "\t$dst, $imm",
@@ -1536,7 +1527,7 @@ def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
IIC_iCMOVr, "mov", "\t$dst, $true",
[/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $dst">, UnaryDP {
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
}
@@ -1545,8 +1536,6 @@ def MOVCCs : AI1<0b1101, (outs GPR:$dst),
"mov", "\t$dst, $true",
[/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $dst">, UnaryDP {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{25} = 0;
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 25c4acd..e1353b7 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -102,6 +102,19 @@ def addrmode_neonldstm : Operand<i32>,
}
*/
+def h8imm : Operand<i8> {
+ let PrintMethod = "printHex8ImmOperand";
+}
+def h16imm : Operand<i16> {
+ let PrintMethod = "printHex16ImmOperand";
+}
+def h32imm : Operand<i32> {
+ let PrintMethod = "printHex32ImmOperand";
+}
+def h64imm : Operand<i64> {
+ let PrintMethod = "printHex64ImmOperand";
+}
+
//===----------------------------------------------------------------------===//
// NEON load / store instructions
//===----------------------------------------------------------------------===//
@@ -133,7 +146,7 @@ def VLDMS : NI<(outs),
// Use vldmia to load a Q register as a D register pair.
def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
IIC_fpLoadm,
- "vldmia $addr, ${dst:dregpair}",
+ "vldmia\t$addr, ${dst:dregpair}",
[(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
let Inst{27-25} = 0b110;
let Inst{24} = 0; // P bit
@@ -145,7 +158,7 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
// Use vstmia to store a Q register as a D register pair.
def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr),
IIC_fpStorem,
- "vstmia $addr, ${src:dregpair}",
+ "vstmia\t$addr, ${src:dregpair}",
[(store (v2f64 QPR:$src), addrmode4:$addr)]> {
let Inst{27-25} = 0b110;
let Inst{24} = 0; // P bit
@@ -2282,7 +2295,7 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
// VMOV : Vector Move (Register)
-def VMOVD : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
+def VMOVDneon: N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
IIC_VMOVD, "vmov\t$dst, $src", "", []>;
def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
IIC_VMOVD, "vmov\t$dst, $src", "", []>;
@@ -2325,38 +2338,38 @@ def vmovImm64 : PatLeaf<(build_vector), [{
// be encoded based on the immed values.
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
- (ins i8imm:$SIMM), IIC_VMOVImm,
+ (ins h8imm:$SIMM), IIC_VMOVImm,
"vmov.i8\t$dst, $SIMM", "",
[(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>;
def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
- (ins i8imm:$SIMM), IIC_VMOVImm,
+ (ins h8imm:$SIMM), IIC_VMOVImm,
"vmov.i8\t$dst, $SIMM", "",
[(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>;
def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst),
- (ins i16imm:$SIMM), IIC_VMOVImm,
+ (ins h16imm:$SIMM), IIC_VMOVImm,
"vmov.i16\t$dst, $SIMM", "",
[(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>;
def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst),
- (ins i16imm:$SIMM), IIC_VMOVImm,
+ (ins h16imm:$SIMM), IIC_VMOVImm,
"vmov.i16\t$dst, $SIMM", "",
[(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>;
def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst),
- (ins i32imm:$SIMM), IIC_VMOVImm,
+ (ins h32imm:$SIMM), IIC_VMOVImm,
"vmov.i32\t$dst, $SIMM", "",
[(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>;
def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst),
- (ins i32imm:$SIMM), IIC_VMOVImm,
+ (ins h32imm:$SIMM), IIC_VMOVImm,
"vmov.i32\t$dst, $SIMM", "",
[(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>;
def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
- (ins i64imm:$SIMM), IIC_VMOVImm,
+ (ins h64imm:$SIMM), IIC_VMOVImm,
"vmov.i64\t$dst, $SIMM", "",
[(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>;
def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
- (ins i64imm:$SIMM), IIC_VMOVImm,
+ (ins h64imm:$SIMM), IIC_VMOVImm,
"vmov.i64\t$dst, $SIMM", "",
[(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 5d02925..2796364 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -740,3 +740,13 @@ def : T1Pat<(i32 thumb_immshifted:$src),
def : T1Pat<(i32 imm0_255_comp:$src),
(tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let isReMaterializable = 1 in
+def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+ NoItinerary, "@ ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ imm:$cp))]>,
+ Requires<[IsThumb1Only]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 5bfda37..1bb9bfd 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1179,3 +1179,13 @@ let isReMaterializable = 1 in
def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
"movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
[(set GPR:$dst, (i32 imm:$src))]>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let isReMaterializable = 1 in
+def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+ NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ imm:$cp))]>,
+ Requires<[IsThumb2]>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 455c33b..ba341f4 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -17,7 +17,7 @@ def SDT_ITOF :
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
def SDT_CMPFP0 :
SDTypeProfile<0, 1, [SDTCisFP<0>]>;
-def SDT_FMDRR :
+def SDT_VMOVDRR :
SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>]>;
@@ -28,7 +28,7 @@ def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>;
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
-def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>;
+def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
//===----------------------------------------------------------------------===//
// Operand Definitions.
@@ -55,21 +55,21 @@ def vfp_f64imm : Operand<f64>,
//
let canFoldAsLoad = 1 in {
-def FLDD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
- IIC_fpLoad64, "fldd", "\t$dst, $addr",
+def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
+ IIC_fpLoad64, "vldr", ".64\t$dst, $addr",
[(set DPR:$dst, (load addrmode5:$addr))]>;
-def FLDS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr),
- IIC_fpLoad32, "flds", "\t$dst, $addr",
+def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr),
+ IIC_fpLoad32, "vldr", ".32\t$dst, $addr",
[(set SPR:$dst, (load addrmode5:$addr))]>;
} // canFoldAsLoad
-def FSTD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr),
- IIC_fpStore64, "fstd", "\t$src, $addr",
+def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr),
+ IIC_fpStore64, "vstr", ".64\t$src, $addr",
[(store DPR:$src, addrmode5:$addr)]>;
-def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
- IIC_fpStore32, "fsts", "\t$src, $addr",
+def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
+ IIC_fpStore32, "vstr", ".32\t$src, $addr",
[(store SPR:$src, addrmode5:$addr)]>;
//===----------------------------------------------------------------------===//
@@ -77,32 +77,32 @@ def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
//
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
-def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
variable_ops), IIC_fpLoadm,
- "fldm${addr:submode}d${p}\t${addr:base}, $wb",
+ "vldm${addr:submode}${p}\t${addr:base}, $wb",
[]> {
let Inst{20} = 1;
}
-def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
variable_ops), IIC_fpLoadm,
- "fldm${addr:submode}s${p}\t${addr:base}, $wb",
+ "vldm${addr:submode}${p}\t${addr:base}, $wb",
[]> {
let Inst{20} = 1;
}
} // mayLoad, hasExtraDefRegAllocReq
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
-def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
variable_ops), IIC_fpStorem,
- "fstm${addr:submode}d${p}\t${addr:base}, $wb",
+ "vstm${addr:submode}${p}\t${addr:base}, $wb",
[]> {
let Inst{20} = 0;
}
-def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
variable_ops), IIC_fpStorem,
- "fstm${addr:submode}s${p}\t${addr:base}, $wb",
+ "vstm${addr:submode}${p}\t${addr:base}, $wb",
[]> {
let Inst{20} = 0;
}
@@ -114,68 +114,68 @@ def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
// FP Binary Operations.
//
-def FADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpALU64, "faddd", "\t$dst, $a, $b",
+def VADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ IIC_fpALU64, "vadd", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
-def FADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpALU32, "fadds", "\t$dst, $a, $b",
+def VADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ IIC_fpALU32, "vadd", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
// These are encoded as unary instructions.
let Defs = [FPSCR] in {
-def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b),
- IIC_fpCMP64, "fcmped", "\t$a, $b",
+def VCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b),
+ IIC_fpCMP64, "vcmpe", ".f64\t$a, $b",
[(arm_cmpfp DPR:$a, DPR:$b)]>;
-def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b),
- IIC_fpCMP32, "fcmpes", "\t$a, $b",
+def VCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b),
+ IIC_fpCMP32, "vcmpe", ".f32\t$a, $b",
[(arm_cmpfp SPR:$a, SPR:$b)]>;
}
-def FDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpDIV64, "fdivd", "\t$dst, $a, $b",
+def VDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ IIC_fpDIV64, "vdiv", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>;
-def FDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpDIV32, "fdivs", "\t$dst, $a, $b",
+def VDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ IIC_fpDIV32, "vdiv", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
-def FMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpMUL64, "fmuld", "\t$dst, $a, $b",
+def VMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ IIC_fpMUL64, "vmul", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
-def FMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpMUL32, "fmuls", "\t$dst, $a, $b",
+def VMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ IIC_fpMUL32, "vmul", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
-
-def FNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpMUL64, "fnmuld", "\t$dst, $a, $b",
+
+def VNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ IIC_fpMUL64, "vnmul", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> {
let Inst{6} = 1;
}
-def FNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpMUL32, "fnmuls", "\t$dst, $a, $b",
+def VNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ IIC_fpMUL32, "vnmul", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> {
let Inst{6} = 1;
}
// Match reassociated forms only if not sign dependent rounding.
def : Pat<(fmul (fneg DPR:$a), DPR:$b),
- (FNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+ (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
def : Pat<(fmul (fneg SPR:$a), SPR:$b),
- (FNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+ (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
-def FSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpALU64, "fsubd", "\t$dst, $a, $b",
+def VSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ IIC_fpALU64, "vsub", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> {
let Inst{6} = 1;
}
-def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpALU32, "fsubs", "\t$dst, $a, $b",
+def VSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ IIC_fpALU32, "vsub", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> {
let Inst{6} = 1;
}
@@ -184,31 +184,31 @@ def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
// FP Unary Operations.
//
-def FABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpUNA64, "fabsd", "\t$dst, $a",
+def VABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a),
+ IIC_fpUNA64, "vabs", ".f64\t$dst, $a",
[(set DPR:$dst, (fabs DPR:$a))]>;
-def FABSS : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a),
- IIC_fpUNA32, "fabss", "\t$dst, $a",
+def VABSS : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+ IIC_fpUNA32, "vabs", ".f32\t$dst, $a",
[(set SPR:$dst, (fabs SPR:$a))]>;
let Defs = [FPSCR] in {
-def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a),
- IIC_fpCMP64, "fcmpezd", "\t$a",
+def VCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a),
+ IIC_fpCMP64, "vcmpe", ".f64\t$a, #0",
[(arm_cmpfp0 DPR:$a)]>;
-def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a),
- IIC_fpCMP32, "fcmpezs", "\t$a",
+def VCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a),
+ IIC_fpCMP32, "vcmpe", ".f32\t$a, #0",
[(arm_cmpfp0 SPR:$a)]>;
}
-def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a),
- IIC_fpCVTDS, "fcvtds", "\t$dst, $a",
+def VCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a),
+ IIC_fpCVTDS, "vcvt", ".f64.f32\t$dst, $a",
[(set DPR:$dst, (fextend SPR:$a))]>;
// Special case encoding: bits 11-8 is 0b1011.
-def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
- IIC_fpCVTSD, "fcvtsd", "\t$dst, $a",
+def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
+ IIC_fpCVTSD, "vcvt", ".f32.f64\t$dst, $a",
[(set SPR:$dst, (fround DPR:$a))]> {
let Inst{27-23} = 0b11101;
let Inst{21-16} = 0b110111;
@@ -217,52 +217,52 @@ def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
}
let neverHasSideEffects = 1 in {
-def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpUNA64, "fcpyd", "\t$dst, $a", []>;
+def VMOVD: ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a),
+ IIC_fpUNA64, "vmov", ".f64\t$dst, $a", []>;
-def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a),
- IIC_fpUNA32, "fcpys", "\t$dst, $a", []>;
+def VMOVS: ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+ IIC_fpUNA32, "vmov", ".f32\t$dst, $a", []>;
} // neverHasSideEffects
-def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpUNA64, "fnegd", "\t$dst, $a",
+def VNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
+ IIC_fpUNA64, "vneg", ".f64\t$dst, $a",
[(set DPR:$dst, (fneg DPR:$a))]>;
-def FNEGS : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a),
- IIC_fpUNA32, "fnegs", "\t$dst, $a",
+def VNEGS : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+ IIC_fpUNA32, "vneg", ".f32\t$dst, $a",
[(set SPR:$dst, (fneg SPR:$a))]>;
-def FSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpSQRT64, "fsqrtd", "\t$dst, $a",
+def VSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a),
+ IIC_fpSQRT64, "vsqrt", ".f64\t$dst, $a",
[(set DPR:$dst, (fsqrt DPR:$a))]>;
-def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a),
- IIC_fpSQRT32, "fsqrts", "\t$dst, $a",
+def VSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+ IIC_fpSQRT32, "vsqrt", ".f32\t$dst, $a",
[(set SPR:$dst, (fsqrt SPR:$a))]>;
//===----------------------------------------------------------------------===//
// FP <-> GPR Copies. Int <-> FP Conversions.
//
-def FMRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
- IIC_VMOVSI, "fmrs", "\t$dst, $src",
+def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
+ IIC_VMOVSI, "vmov", "\t$dst, $src",
[(set GPR:$dst, (bitconvert SPR:$src))]>;
-def FMSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
- IIC_VMOVIS, "fmsr", "\t$dst, $src",
+def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
+ IIC_VMOVIS, "vmov", "\t$dst, $src",
[(set SPR:$dst, (bitconvert GPR:$src))]>;
-def FMRRD : AVConv3I<0b11000101, 0b1011,
+def VMOVRRD : AVConv3I<0b11000101, 0b1011,
(outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
- IIC_VMOVDI, "fmrrd", "\t$wb, $dst2, $src",
+ IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src",
[/* FIXME: Can't write pattern for multiple result instr*/]>;
// FMDHR: GPR -> SPR
// FMDLR: GPR -> SPR
-def FMDRR : AVConv5I<0b11000100, 0b1011,
+def VMOVDRR : AVConv5I<0b11000100, 0b1011,
(outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
- IIC_VMOVID, "fmdrr", "\t$dst, $src1, $src2",
+ IIC_VMOVID, "vmov", "\t$dst, $src1, $src2",
[(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>;
// FMRDH: SPR -> GPR
@@ -277,53 +277,53 @@ def FMDRR : AVConv5I<0b11000100, 0b1011,
// Int to FP:
-def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
- IIC_fpCVTID, "fsitod", "\t$dst, $a",
+def VSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a",
[(set DPR:$dst, (arm_sitof SPR:$a))]> {
let Inst{7} = 1;
}
-def FSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
- IIC_fpCVTIS, "fsitos", "\t$dst, $a",
+def VSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a",
[(set SPR:$dst, (arm_sitof SPR:$a))]> {
let Inst{7} = 1;
}
-def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
- IIC_fpCVTID, "fuitod", "\t$dst, $a",
+def VUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a",
[(set DPR:$dst, (arm_uitof SPR:$a))]>;
-def FUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
- IIC_fpCVTIS, "fuitos", "\t$dst, $a",
+def VUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a",
[(set SPR:$dst, (arm_uitof SPR:$a))]>;
// FP to Int:
// Always set Z bit in the instruction, i.e. "round towards zero" variants.
-def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011,
+def VTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011,
(outs SPR:$dst), (ins DPR:$a),
- IIC_fpCVTDI, "ftosizd", "\t$dst, $a",
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a",
[(set SPR:$dst, (arm_ftosi DPR:$a))]> {
let Inst{7} = 1; // Z bit
}
-def FTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010,
+def VTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010,
(outs SPR:$dst), (ins SPR:$a),
- IIC_fpCVTSI, "ftosizs", "\t$dst, $a",
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a",
[(set SPR:$dst, (arm_ftosi SPR:$a))]> {
let Inst{7} = 1; // Z bit
}
-def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011,
+def VTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011,
(outs SPR:$dst), (ins DPR:$a),
- IIC_fpCVTDI, "ftouizd", "\t$dst, $a",
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a",
[(set SPR:$dst, (arm_ftoui DPR:$a))]> {
let Inst{7} = 1; // Z bit
}
-def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010,
+def VTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010,
(outs SPR:$dst), (ins SPR:$a),
- IIC_fpCVTSI, "ftouizs", "\t$dst, $a",
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a",
[(set SPR:$dst, (arm_ftoui SPR:$a))]> {
let Inst{7} = 1; // Z bit
}
@@ -332,54 +332,54 @@ def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010,
// FP FMA Operations.
//
-def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "fmacd", "\t$dst, $a, $b",
+def VMLAD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
-def FMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "fmacs", "\t$dst, $a, $b",
+def VMLAS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ IIC_fpMAC32, "vmla", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
-def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "fmscd", "\t$dst, $a, $b",
+def VNMLSD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
-def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "fmscs", "\t$dst, $a, $b",
+def VNMLSS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ IIC_fpMAC32, "vnmls", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
-def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "fnmacd", "\t$dst, $a, $b",
+def VMLSD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
RegConstraint<"$dstin = $dst"> {
let Inst{6} = 1;
}
-def FNMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "fnmacs", "\t$dst, $a, $b",
+def VMLSS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ IIC_fpMAC32, "vmls", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst"> {
let Inst{6} = 1;
}
def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, DPR:$b)),
- (FNMACD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
+ (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
- (FNMACS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
+ (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
-def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "fnmscd", "\t$dst, $a, $b",
+def VNMLAD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
RegConstraint<"$dstin = $dst"> {
let Inst{6} = 1;
}
-def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "fnmscs", "\t$dst, $a, $b",
+def VNMLAS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ IIC_fpMAC32, "vnmla", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst"> {
let Inst{6} = 1;
@@ -389,27 +389,27 @@ def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
// FP Conditional moves.
//
-def FCPYDcc : ADuI<0b11101011, 0b0000, 0b0100,
+def VMOVDcc : ADuI<0b11101011, 0b0000, 0b0100,
(outs DPR:$dst), (ins DPR:$false, DPR:$true),
- IIC_fpUNA64, "fcpyd", "\t$dst, $true",
+ IIC_fpUNA64, "vmov", ".f64\t$dst, $true",
[/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>,
RegConstraint<"$false = $dst">;
-def FCPYScc : ASuI<0b11101011, 0b0000, 0b0100,
+def VMOVScc : ASuI<0b11101011, 0b0000, 0b0100,
(outs SPR:$dst), (ins SPR:$false, SPR:$true),
- IIC_fpUNA32, "fcpys", "\t$dst, $true",
+ IIC_fpUNA32, "vmov", ".f32\t$dst, $true",
[/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>,
RegConstraint<"$false = $dst">;
-def FNEGDcc : ADuI<0b11101011, 0b0001, 0b0100,
+def VNEGDcc : ADuI<0b11101011, 0b0001, 0b0100,
(outs DPR:$dst), (ins DPR:$false, DPR:$true),
- IIC_fpUNA64, "fnegd", "\t$dst, $true",
+ IIC_fpUNA64, "vneg", ".f64\t$dst, $true",
[/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>,
RegConstraint<"$false = $dst">;
-def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100,
+def VNEGScc : ASuI<0b11101011, 0b0001, 0b0100,
(outs SPR:$dst), (ins SPR:$false, SPR:$true),
- IIC_fpUNA32, "fnegs", "\t$dst, $true",
+ IIC_fpUNA32, "vneg", ".f32\t$dst, $true",
[/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
RegConstraint<"$false = $dst">;
@@ -418,8 +418,11 @@ def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100,
// Misc.
//
+// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
+// to APSR.
let Defs = [CPSR], Uses = [FPSCR] in
-def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "",
+def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
+ "\tapsr_nzcv, fpscr",
[(arm_fmstat)]> {
let Inst{27-20} = 0b11101111;
let Inst{19-16} = 0b0001;
@@ -431,26 +434,26 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "",
// Materialize FP immediates. VFP3 only.
-let isReMaterializable = 1 in
-def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm),
+let isReMaterializable = 1 in {
+def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
VFPMiscFrm, IIC_VMOVImm,
- "fconsts", "\t$dst, $imm",
- [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+ "fconstd", "\t$dst, $imm",
+ [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
let Inst{27-23} = 0b11101;
let Inst{21-20} = 0b11;
let Inst{11-9} = 0b101;
- let Inst{8} = 0;
+ let Inst{8} = 1;
let Inst{7-4} = 0b0000;
}
-let isReMaterializable = 1 in
-def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
+def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm),
VFPMiscFrm, IIC_VMOVImm,
- "fconstd", "\t$dst, $imm",
- [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+ "fconsts", "\t$dst, $imm",
+ [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
let Inst{27-23} = 0b11101;
let Inst{21-20} = 0b11;
let Inst{11-9} = 0b101;
- let Inst{8} = 1;
+ let Inst{8} = 0;
let Inst{7-4} = 0b0000;
}
+}
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 7e1783b..304d0ef 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -41,8 +41,8 @@ using namespace llvm;
STATISTIC(NumLDMGened , "Number of ldm instructions generated");
STATISTIC(NumSTMGened , "Number of stm instructions generated");
-STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
-STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
+STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
+STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
@@ -127,18 +127,18 @@ static int getLoadStoreMultipleOpcode(int Opcode) {
case ARM::t2STRi12:
NumSTMGened++;
return ARM::t2STM;
- case ARM::FLDS:
- NumFLDMGened++;
- return ARM::FLDMS;
- case ARM::FSTS:
- NumFSTMGened++;
- return ARM::FSTMS;
- case ARM::FLDD:
- NumFLDMGened++;
- return ARM::FLDMD;
- case ARM::FSTD:
- NumFSTMGened++;
- return ARM::FSTMD;
+ case ARM::VLDRS:
+ NumVLDMGened++;
+ return ARM::VLDMS;
+ case ARM::VSTRS:
+ NumVSTMGened++;
+ return ARM::VSTMS;
+ case ARM::VLDRD:
+ NumVLDMGened++;
+ return ARM::VLDMD;
+ case ARM::VSTRD:
+ NumVSTMGened++;
+ return ARM::VSTMD;
default: llvm_unreachable("Unhandled opcode!");
}
return 0;
@@ -229,8 +229,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
BaseKill = true; // New base is always killed right its use.
}
- bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
- bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD;
+ bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
Opcode = getLoadStoreMultipleOpcode(Opcode);
MachineInstrBuilder MIB = (isAM4)
? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
@@ -373,27 +373,27 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
case ARM::t2LDRi12:
case ARM::t2STRi8:
case ARM::t2STRi12:
- case ARM::FLDS:
- case ARM::FSTS:
+ case ARM::VLDRS:
+ case ARM::VSTRS:
return 4;
- case ARM::FLDD:
- case ARM::FSTD:
+ case ARM::VLDRD:
+ case ARM::VSTRD:
return 8;
case ARM::LDM:
case ARM::STM:
case ARM::t2LDM:
case ARM::t2STM:
return (MI->getNumOperands() - 5) * 4;
- case ARM::FLDMS:
- case ARM::FSTMS:
- case ARM::FLDMD:
- case ARM::FSTMD:
+ case ARM::VLDMS:
+ case ARM::VSTMS:
+ case ARM::VLDMD:
+ case ARM::VSTMD:
return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
}
}
/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
-/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
+/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
///
/// stmia rn, <ra, rb, rc>
/// rn := rn + 4 * 3;
@@ -475,7 +475,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
}
}
} else {
- // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
+ // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
return false;
@@ -517,10 +517,10 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
switch (Opc) {
case ARM::LDR: return ARM::LDR_PRE;
case ARM::STR: return ARM::STR_PRE;
- case ARM::FLDS: return ARM::FLDMS;
- case ARM::FLDD: return ARM::FLDMD;
- case ARM::FSTS: return ARM::FSTMS;
- case ARM::FSTD: return ARM::FSTMD;
+ case ARM::VLDRS: return ARM::VLDMS;
+ case ARM::VLDRD: return ARM::VLDMD;
+ case ARM::VSTRS: return ARM::VSTMS;
+ case ARM::VSTRD: return ARM::VSTMD;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
return ARM::t2LDR_PRE;
@@ -536,10 +536,10 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
switch (Opc) {
case ARM::LDR: return ARM::LDR_POST;
case ARM::STR: return ARM::STR_POST;
- case ARM::FLDS: return ARM::FLDMS;
- case ARM::FLDD: return ARM::FLDMD;
- case ARM::FSTS: return ARM::FSTMS;
- case ARM::FSTD: return ARM::FSTMD;
+ case ARM::VLDRS: return ARM::VLDMS;
+ case ARM::VLDRD: return ARM::VLDMD;
+ case ARM::VSTRS: return ARM::VSTMS;
+ case ARM::VSTRD: return ARM::VSTMD;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
return ARM::t2LDR_POST;
@@ -564,8 +564,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
unsigned Bytes = getLSMultipleTransferSize(MI);
int Opcode = MI->getOpcode();
DebugLoc dl = MI->getDebugLoc();
- bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS ||
- Opcode == ARM::FSTD || Opcode == ARM::FSTS;
+ bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
+ Opcode == ARM::VSTRD || Opcode == ARM::VSTRS;
bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
return false;
@@ -575,7 +575,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (MI->getOperand(2).getImm() != 0)
return false;
- bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
// Can't do the merge if the destination register is the same as the would-be
// writeback register.
if (isLd && MI->getOperand(0).getReg() == Base)
@@ -626,7 +626,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (!DoMerge)
return false;
- bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
+ bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
unsigned Offset = 0;
if (isAM5)
Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
@@ -638,7 +638,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
if (isLd) {
if (isAM5)
- // FLDMS, FLDMD
+ // VLDMS, VLDMD
BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
.addReg(Base, getKillRegState(BaseKill))
.addImm(Offset).addImm(Pred).addReg(PredReg)
@@ -657,7 +657,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
} else {
MachineOperand &MO = MI->getOperand(0);
if (isAM5)
- // FSTMS, FSTMD
+ // VSTMS, VSTMD
BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
.addImm(Pred).addReg(PredReg)
.addReg(Base, getDefRegState(true)) // WB base register
@@ -687,11 +687,11 @@ static bool isMemoryOp(const MachineInstr *MI) {
case ARM::LDR:
case ARM::STR:
return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
- case ARM::FLDS:
- case ARM::FSTS:
+ case ARM::VLDRS:
+ case ARM::VSTRS:
return MI->getOperand(1).isReg();
- case ARM::FLDD:
- case ARM::FSTD:
+ case ARM::VLDRD:
+ case ARM::VSTRD:
return MI->getOperand(1).isReg();
case ARM::t2LDRi8:
case ARM::t2LDRi12:
@@ -866,6 +866,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
Pred, PredReg, TII, isT2);
} else {
+ if (OddReg == EvenReg && EvenDeadKill) {
+ // If the two source operands are the same, the kill marker is probably
+ // on the first one. e.g.
+ // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
+ EvenDeadKill = false;
+ OddDeadKill = true;
+ }
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, EvenUndef,
BaseReg, false, BaseUndef, OffReg, false, OffUndef,
@@ -1214,7 +1221,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
if (!STI->hasV5TEOps())
return false;
- // FIXME: FLDS / FSTS -> FLDD / FSTD
+ // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
unsigned Scale = 1;
unsigned Opcode = Op0->getOpcode();
if (Opcode == ARM::LDR)
@@ -1456,7 +1463,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
continue;
int Opc = MI->getOpcode();
- bool isLd = isi32Load(Opc) || Opc == ARM::FLDS || Opc == ARM::FLDD;
+ bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
unsigned Base = MI->getOperand(1).getReg();
int Offset = getMemoryOpOffset(MI);
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 5af95c3..432ed78 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -16,6 +16,7 @@
#include "llvm/GlobalValue.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallVector.h"
using namespace llvm;
static cl::opt<bool>
@@ -108,6 +109,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
if (UseNEONFP.getPosition() == 0)
UseNEONForSinglePrecisionFP = true;
}
+ HasBranchTargetBuffer = (CPUString == "cortex-a8" ||
+ CPUString == "cortex-a9");
}
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
@@ -159,3 +162,13 @@ ARMSubtarget::GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const {
return false;
}
+
+bool ARMSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(&ARM::GPRRegClass);
+ return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index e721a7f..3d0e01e 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -17,6 +17,7 @@
#include "llvm/Target/TargetInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtarget.h"
+#include "ARMBaseRegisterInfo.h"
#include <string>
namespace llvm {
@@ -49,6 +50,9 @@ protected:
/// determine if NEON should actually be used.
bool UseNEONForSinglePrecisionFP;
+ /// HasBranchTargetBuffer - True if processor can predict indirect branches.
+ bool HasBranchTargetBuffer;
+
/// IsThumb - True if we are in thumb mode, false if in ARM mode.
bool IsThumb;
@@ -122,17 +126,16 @@ protected:
bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); }
bool hasThumb2() const { return ThumbMode >= Thumb2; }
+ bool hasBranchTargetBuffer() const { return HasBranchTargetBuffer; }
+
bool isR9Reserved() const { return IsR9Reserved; }
const std::string & getCPUString() const { return CPUString; }
- /// enablePostRAScheduler - True at 'More' optimization except
- /// for Thumb1.
+ /// enablePostRAScheduler - True at 'More' optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- TargetSubtarget::AntiDepBreakMode& mode) const {
- mode = TargetSubtarget::ANTIDEP_CRITICAL;
- return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
- }
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
/// getInstrItins - Return the instruction itineraies based on subtarget
/// selection.
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index b4ce1d7..2564ed9 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -21,8 +21,7 @@
#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
-static const MCAsmInfo *createMCAsmInfo(const Target &T,
- const StringRef &TT) {
+static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
case Triple::Darwin:
@@ -61,8 +60,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget),
DataLayout(Subtarget.isAPCS_ABI() ?
- std::string("e-p:32:32-f64:32:32-i64:32:32") :
- std::string("e-p:32:32-f64:64:64-i64:64:64")),
+ std::string("e-p:32:32-f64:32:32-i64:32:32-n32") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64-n32")),
TLInfo(*this) {
}
@@ -74,9 +73,9 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:32-i64:32:32-"
- "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
+ "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
- "i16:16:32-i8:8:32-i1:8:32-a:0:32")),
+ "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")),
TLInfo(*this) {
}
@@ -94,6 +93,10 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
if (Subtarget.hasNEON())
PM.add(createNEONPreAllocPass());
+ // Calculate and set max stack object alignment early, so we can decide
+ // whether we will need stack realignment (and thus FP).
+ PM.add(createARMMaxStackAlignmentCalculatorPass());
+
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass(true));
@@ -106,6 +109,10 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass());
+ // Expand some pseudo instructions into multiple instructions to allow
+ // proper scheduling.
+ PM.add(createARMExpandPseudoPass());
+
return true;
}
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 6cb3e9e4..0352503 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -43,6 +43,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -138,6 +139,19 @@ namespace {
void printVFPf32ImmOperand(const MachineInstr *MI, int OpNum);
void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum);
+ void printHex8ImmOperand(const MachineInstr *MI, int OpNum) {
+ O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff);
+ }
+ void printHex16ImmOperand(const MachineInstr *MI, int OpNum) {
+ O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff);
+ }
+ void printHex32ImmOperand(const MachineInstr *MI, int OpNum) {
+ O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff);
+ }
+ void printHex64ImmOperand(const MachineInstr *MI, int OpNum) {
+ O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm());
+ }
+
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode);
virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
@@ -199,7 +213,7 @@ namespace {
if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")";
if (ACPV->getPCAdjustment() != 0) {
O << "-(" << MAI->getPrivateGlobalPrefix() << "PC"
- << ACPV->getLabelId()
+ << getFunctionNumber() << "_" << ACPV->getLabelId()
<< "+" << (unsigned)ACPV->getPCAdjustment();
if (ACPV->mustAddCurrentAddress())
O << "-.";
@@ -333,6 +347,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
&ARM::DPR_VFP2RegClass);
O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']';
} else {
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
O << getRegisterName(Reg);
}
break;
@@ -594,12 +609,7 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
if (Modifier && strcmp(Modifier, "submode") == 0) {
ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
- if (MO1.getReg() == ARM::SP) {
- bool isFLDM = (MI->getOpcode() == ARM::FLDMD ||
- MI->getOpcode() == ARM::FLDMS);
- O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM);
- } else
- O << ARM_AM::getAMSubModeStr(Mode);
+ O << ARM_AM::getAMSubModeStr(Mode);
return;
} else if (Modifier && strcmp(Modifier, "base") == 0) {
// Used for FSTM{D|S} and LSTM{D|S} operations.
@@ -623,9 +633,14 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) {
const MachineOperand &MO1 = MI->getOperand(Op);
const MachineOperand &MO2 = MI->getOperand(Op+1);
const MachineOperand &MO3 = MI->getOperand(Op+2);
+ const MachineOperand &MO4 = MI->getOperand(Op+3);
- // FIXME: No support yet for specifying alignment.
- O << "[" << getRegisterName(MO1.getReg()) << "]";
+ O << "[" << getRegisterName(MO1.getReg());
+ if (MO4.getImm()) {
+ // FIXME: Both darwin as and GNU as violate ARM docs here.
+ O << ", :" << MO4.getImm();
+ }
+ O << "]";
if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
if (MO2.getReg() == 0)
@@ -697,11 +712,8 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
O << "[" << getRegisterName(MO1.getReg());
if (MO3.getReg())
O << ", " << getRegisterName(MO3.getReg());
- else if (unsigned ImmOffs = MO2.getImm()) {
- O << ", #" << ImmOffs;
- if (Scale > 1)
- O << " * " << Scale;
- }
+ else if (unsigned ImmOffs = MO2.getImm())
+ O << ", #" << ImmOffs * Scale;
O << "]";
}
@@ -844,7 +856,8 @@ void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum){
void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum) {
int Id = (int)MI->getOperand(OpNum).getImm();
- O << MAI->getPrivateGlobalPrefix() << "PC" << Id;
+ O << MAI->getPrivateGlobalPrefix()
+ << "PC" << getFunctionNumber() << "_" << Id;
}
void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum) {
@@ -1070,7 +1083,7 @@ void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
printInstruction(MI);
}
- if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+ if (VerboseAsm)
EmitComments(*MI);
O << '\n';
processDebugLoc(MI, false);
@@ -1107,9 +1120,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
}
}
- // Use unified assembler syntax mode for Thumb.
- if (Subtarget->isThumb())
- O << "\t.syntax unified\n";
+ // Use unified assembler syntax.
+ O << "\t.syntax unified\n";
// Emit ARM Build Attributes
if (Subtarget->isTargetELF()) {
@@ -1349,7 +1361,6 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
printKill(MI);
return;
case TargetInstrInfo::INLINEASM:
- O << '\t';
printInlineAsm(MI);
return;
case TargetInstrInfo::IMPLICIT_DEF:
@@ -1365,7 +1376,8 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
// FIXME: MOVE TO SHARED PLACE.
unsigned Id = (unsigned)MI->getOperand(2).getImm();
const char *Prefix = MAI->getPrivateGlobalPrefix();
- MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)+"PC"+Twine(Id));
+ MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)
+ + "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id));
OutStreamer.EmitLabel(Label);
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index f422798..0047925 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -259,12 +259,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
if (Modifier && strcmp(Modifier, "submode") == 0) {
ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
- if (MO1.getReg() == ARM::SP) {
- bool isFLDM = (MI->getOpcode() == ARM::FLDMD ||
- MI->getOpcode() == ARM::FLDMS);
- O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM);
- } else
- O << ARM_AM::getAMSubModeStr(Mode);
+ O << ARM_AM::getAMSubModeStr(Mode);
return;
} else if (Modifier && strcmp(Modifier, "base") == 0) {
// Used for FSTM{D|S} and LSTM{D|S} operations.
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index 5bf966b..9e7f8d5 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -80,6 +80,10 @@ public:
void printNoHashImmediate(const MCInst *MI, unsigned OpNum);
void printVFPf32ImmOperand(const MCInst *MI, int OpNum) {}
void printVFPf64ImmOperand(const MCInst *MI, int OpNum) {}
+ void printHex8ImmOperand(const MCInst *MI, int OpNum) {}
+ void printHex16ImmOperand(const MCInst *MI, int OpNum) {}
+ void printHex32ImmOperand(const MCInst *MI, int OpNum) {}
+ void printHex64ImmOperand(const MCInst *MI, int OpNum) {}
void printPCLabel(const MCInst *MI, unsigned OpNum);
// FIXME: Implement.
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
index 8686961..c49fee3 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
@@ -137,6 +137,7 @@ void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
if (MO.isImplicit()) continue;
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
MCOp = MCOperand::CreateReg(MO.getReg());
break;
case MachineOperand::MO_Immediate:
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index e071b61..964551f 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -17,6 +17,7 @@ add_llvm_target(ARMCodeGen
ARMCodeEmitter.cpp
ARMConstantIslandPass.cpp
ARMConstantPoolValue.cpp
+ ARMExpandPseudoInsts.cpp
ARMISelDAGToDAG.cpp
ARMISelLowering.cpp
ARMInstrInfo.cpp
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index f307e3b..7d767ec 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -54,10 +54,10 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
NextMII = next(MII);
MachineInstr *MI = &*MII;
- if (MI->getOpcode() == ARM::FCPYD &&
+ if (MI->getOpcode() == ARM::VMOVD &&
!TII->isPredicated(MI)) {
unsigned SrcReg = MI->getOperand(1).getReg();
- // If we do not found an instruction defining the reg, this means the
+ // If we do not find an instruction defining the reg, this means the
// register should be live-in for this BB. It's always to better to use
// NEON reg-reg moves.
unsigned Domain = ARMII::DomainNEON;
@@ -71,7 +71,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
}
if (Domain & ARMII::DomainNEON) {
- // Convert FCPYD to VMOVD.
+ // Convert VMOVD to VMOVDneon
unsigned DestReg = MI->getOperand(0).getReg();
DEBUG({errs() << "vmov convert: "; MI->dump();});
@@ -82,7 +82,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
// - The imp-defs / imp-uses are superregs only, we don't care about
// them.
BuildMI(MBB, *MI, MI->getDebugLoc(),
- TII->get(ARM::VMOVD), DestReg).addReg(SrcReg);
+ TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg);
MBB.erase(MI);
MachineBasicBlock::iterator I = prior(NextMII);
MI = &*I;
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index 8b2bcd0..206677b 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -177,20 +177,20 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST2LNd8:
case ARM::VST2LNd16:
case ARM::VST2LNd32:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 2;
return true;
case ARM::VST2q8:
case ARM::VST2q16:
case ARM::VST2q32:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 4;
return true;
case ARM::VST2LNq16a:
case ARM::VST2LNq32a:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 2;
Offset = 0;
Stride = 2;
@@ -198,7 +198,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST2LNq16b:
case ARM::VST2LNq32b:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 2;
Offset = 1;
Stride = 2;
@@ -211,14 +211,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST3LNd8:
case ARM::VST3LNd16:
case ARM::VST3LNd32:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 3;
return true;
case ARM::VST3q8a:
case ARM::VST3q16a:
case ARM::VST3q32a:
- FirstOpnd = 4;
+ FirstOpnd = 5;
NumRegs = 3;
Offset = 0;
Stride = 2;
@@ -227,7 +227,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST3q8b:
case ARM::VST3q16b:
case ARM::VST3q32b:
- FirstOpnd = 4;
+ FirstOpnd = 5;
NumRegs = 3;
Offset = 1;
Stride = 2;
@@ -235,7 +235,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST3LNq16a:
case ARM::VST3LNq32a:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 3;
Offset = 0;
Stride = 2;
@@ -243,7 +243,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST3LNq16b:
case ARM::VST3LNq32b:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 3;
Offset = 1;
Stride = 2;
@@ -256,14 +256,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST4LNd8:
case ARM::VST4LNd16:
case ARM::VST4LNd32:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 4;
return true;
case ARM::VST4q8a:
case ARM::VST4q16a:
case ARM::VST4q32a:
- FirstOpnd = 4;
+ FirstOpnd = 5;
NumRegs = 4;
Offset = 0;
Stride = 2;
@@ -272,7 +272,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST4q8b:
case ARM::VST4q16b:
case ARM::VST4q32b:
- FirstOpnd = 4;
+ FirstOpnd = 5;
NumRegs = 4;
Offset = 1;
Stride = 2;
@@ -280,7 +280,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST4LNq16a:
case ARM::VST4LNq32a:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 4;
Offset = 0;
Stride = 2;
@@ -288,7 +288,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST4LNq16b:
case ARM::VST4LNq32b:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 4;
Offset = 1;
Stride = 2;
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index e7770b2..6b605bb 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -37,7 +37,7 @@ LPCRELL0:
mov r1, #PCRELV0
add r1, pc
ldr r0, [r0, r1]
- cpy pc, r0
+ mov pc, r0
.align 2
LJTI1_0_0:
.long LBB1_3
@@ -51,7 +51,7 @@ We should be able to generate:
LPCRELL0:
add r1, LJTI1_0_0
ldr r0, [r0, r1]
- cpy pc, r0
+ mov pc, r0
.align 2
LJTI1_0_0:
.long LBB1_3
@@ -206,8 +206,8 @@ LPC0:
add r5, pc
ldr r6, LCPI1_1
ldr r2, LCPI1_2
- cpy r3, r6
- cpy lr, pc
+ mov r3, r6
+ mov lr, pc
bx r5
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index fb64d9f..11c48ad 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -321,7 +321,7 @@ time.
4) Once we added support for multiple result patterns, write indexed loads
patterns instead of C++ instruction selection code.
-5) Use FLDM / FSTM to emulate indexed FP load / store.
+5) Use VLDM / VSTM to emulate indexed FP load / store.
//===---------------------------------------------------------------------===//
@@ -591,3 +591,8 @@ http://lists.cs.uiuc.edu/pipermail/llvmdev/2009-June/022763.html
//===---------------------------------------------------------------------===//
Make use of the "rbit" instruction.
+
+//===---------------------------------------------------------------------===//
+
+Take a look at test/CodeGen/Thumb2/machine-licm.ll. ARM should be taught how
+to licm and cse the unnecessary load from cp#1.
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index b6dd56c..7602b6d 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information --------*- C++ -*-===//
+//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMInstrInfo.h"
+#include "Thumb1InstrInfo.h"
#include "ARM.h"
#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 13cc578..b28229d 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ----------*- C++ -*-===//
+//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 5aaaf9c..37adf37 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------*- C++ -*-===//
+//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the Thumb-1 implementation of the TargetRegisterInfo class.
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
//
//===----------------------------------------------------------------------===//
@@ -794,7 +795,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
if (NumBytes != 0)
emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
} else {
- // Unwind MBBI to point to first LDR / FLDD.
+ // Unwind MBBI to point to first LDR / VLDRD.
const unsigned *CSRegs = getCalleeSavedRegs();
if (MBBI != MBB.begin()) {
do
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 241f1cc..37ad388 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl ----*- C++ -*-===//
+//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the Thumb-1 implementation of the TargetRegisterInfo class.
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 462844b..f5ba155 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -1,4 +1,4 @@
-//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks -----------*- C++ -*-===//
+//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -34,10 +34,6 @@ namespace {
}
private:
- MachineBasicBlock::iterator
- SplitT2MOV32imm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineInstr *MI, DebugLoc dl,
- unsigned PredReg, ARMCC::CondCodes CC);
bool InsertITBlocks(MachineBasicBlock &MBB);
};
char Thumb2ITBlockPass::ID = 0;
@@ -50,34 +46,6 @@ static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){
return llvm::getInstrPredicate(MI, PredReg);
}
-MachineBasicBlock::iterator
-Thumb2ITBlockPass::SplitT2MOV32imm(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- MachineInstr *MI,
- DebugLoc dl, unsigned PredReg,
- ARMCC::CondCodes CC) {
- // Splitting t2MOVi32imm into a pair of t2MOVi16 + t2MOVTi16 here.
- // The only reason it was a single instruction was so it could be
- // re-materialized. We want to split it before this and the thumb2
- // size reduction pass to make sure the IT mask is correct and expose
- // width reduction opportunities. It doesn't make sense to do this in a
- // separate pass so here it is.
- unsigned DstReg = MI->getOperand(0).getReg();
- bool DstDead = MI->getOperand(0).isDead(); // Is this possible?
- unsigned Imm = MI->getOperand(1).getImm();
- unsigned Lo16 = Imm & 0xffff;
- unsigned Hi16 = (Imm >> 16) & 0xffff;
- BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVi16), DstReg)
- .addImm(Lo16).addImm(CC).addReg(PredReg);
- BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVTi16))
- .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead))
- .addReg(DstReg).addImm(Hi16).addImm(CC).addReg(PredReg);
- --MBBI;
- --MBBI;
- MI->eraseFromParent();
- return MBBI;
-}
-
bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
bool Modified = false;
@@ -88,11 +56,6 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
unsigned PredReg = 0;
ARMCC::CondCodes CC = getPredicate(MI, PredReg);
- if (MI->getOpcode() == ARM::t2MOVi32imm) {
- MBBI = SplitT2MOV32imm(MBB, MBBI, MI, dl, PredReg, CC);
- continue;
- }
-
if (CC == ARMCC::AL) {
++MBBI;
continue;
@@ -115,11 +78,6 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
DebugLoc ndl = NMI->getDebugLoc();
unsigned NPredReg = 0;
ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg);
- if (NMI->getOpcode() == ARM::t2MOVi32imm) {
- MBBI = SplitT2MOV32imm(MBB, MBBI, NMI, ndl, NPredReg, NCC);
- continue;
- }
-
if (NCC == OCC) {
Mask |= (1 << Pos);
} else if (NCC != CC)
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 21fff51..16c1e6f 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------*- C++ -*-===//
+//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,8 +11,9 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMInstrInfo.h"
+#include "Thumb2InstrInfo.h"
#include "ARM.h"
+#include "ARMConstantPoolValue.h"
#include "ARMAddressingModes.h"
#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
@@ -132,7 +133,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC);
}
-
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index f3688c0..663a60b 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ----------*- C++ -*-===//
+//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index a295630..b3cf2e5 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl ----*- C++ -*-===//
+//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the Thumb-2 implementation of the TargetRegisterInfo class.
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
//
//===----------------------------------------------------------------------===//
OpenPOWER on IntegriCloud