summaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2009-12-15 18:09:07 +0000
committerrdivacky <rdivacky@FreeBSD.org>2009-12-15 18:09:07 +0000
commit40a6fcdb85efd93fe0e36c9552cfb0b18b5eacd6 (patch)
tree076117cdf3579003f07cad4cdf0593347ce58150 /lib/Target
parente7908924d847e63b02bc82bfaa1709ab9c774dcd (diff)
downloadFreeBSD-src-40a6fcdb85efd93fe0e36c9552cfb0b18b5eacd6.zip
FreeBSD-src-40a6fcdb85efd93fe0e36c9552cfb0b18b5eacd6.tar.gz
Update LLVM to 91430.
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARM.h1
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp9
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h5
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp96
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp22
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp2
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp341
-rw-r--r--lib/Target/ARM/ARMISelLowering.h12
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td52
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp19
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h3
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td198
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td4
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td29
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td62
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp6
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp2
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp1
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp2
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp2
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp19
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h3
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp24
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h3
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp2
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp12
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.h1
-rw-r--r--lib/Target/CBackend/CBackend.cpp4
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp4
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h3
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp13
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp4
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp334
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h20
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp15
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h1
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td100
-rw-r--r--lib/Target/MSP430/MSP430MachineFunctionInfo.h8
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp33
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp16
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h1
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp12
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h1
-rw-r--r--lib/Target/README.txt115
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp16
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h1
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp2
-rw-r--r--lib/Target/TargetData.cpp126
-rw-r--r--lib/Target/X86/X86.h5
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h1
-rw-r--r--lib/Target/X86/X86CallingConv.td9
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp2
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp190
-rw-r--r--lib/Target/X86/X86ISelLowering.h8
-rw-r--r--lib/Target/X86/X86Instr64bit.td9
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp305
-rw-r--r--lib/Target/X86/X86InstrInfo.h25
-rw-r--r--lib/Target/X86/X86InstrInfo.td24
-rw-r--r--lib/Target/X86/X86InstrSSE.td2
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp68
-rw-r--r--lib/Target/X86/X86Subtarget.cpp2
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp2
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp20
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h2
65 files changed, 1761 insertions, 676 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index ff1980d..21445ad 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -109,7 +109,6 @@ FunctionPass *createNEONPreAllocPass();
FunctionPass *createNEONMoveFixPass();
FunctionPass *createThumb2ITBlockPass();
FunctionPass *createThumb2SizeReductionPass();
-FunctionPass *createARMMaxStackAlignmentCalculatorPass();
extern Target TheARMTarget, TheThumbTarget;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c95d4c8..1aae369 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -418,11 +418,13 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
return true;
}
-/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
+DISABLE_INLINE
static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
- unsigned JTI) DISABLE_INLINE;
+ unsigned JTI);
static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
unsigned JTI) {
+ assert(JTI < JT.size());
return JT[JTI].MBBs.size();
}
@@ -467,6 +469,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return MI->getOperand(2).getImm();
case ARM::Int_eh_sjlj_setjmp:
return 24;
+ case ARM::tInt_eh_sjlj_setjmp:
+ return 22;
case ARM::t2Int_eh_sjlj_setjmp:
return 22;
case ARM::BR_JTr:
@@ -755,7 +759,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
assert((RC == ARM::QPRRegisterClass ||
RC == ARM::QPR_VFP2RegisterClass ||
RC == ARM::QPR_8RegisterClass) && "Unknown regclass!");
- // FIXME: Neon instructions should support predicates
if (Align >= 16
&& (getRegisterInfo().needsStackRealignment(MF))) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 282e30c..78d9135 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -92,6 +92,8 @@ namespace ARMII {
StMiscFrm = 9 << FormShift,
LdStMulFrm = 10 << FormShift,
+ LdStExFrm = 28 << FormShift,
+
// Miscellaneous arithmetic instructions
ArithMiscFrm = 11 << FormShift,
@@ -190,9 +192,6 @@ public:
// if there is not such an opcode.
virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
- // Return true if the block does not fall through.
- virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const =0;
-
virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 653328d..9b5f79f 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -471,21 +471,6 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
}
}
-static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
- unsigned MaxAlign = 0;
-
- for (int i = FFI->getObjectIndexBegin(),
- e = FFI->getObjectIndexEnd(); i != e; ++i) {
- if (FFI->isDeadObjectIndex(i))
- continue;
-
- unsigned Align = FFI->getObjectAlignment(i);
- MaxAlign = std::max(MaxAlign, Align);
- }
-
- return MaxAlign;
-}
-
/// hasFP - Return true if the specified function should have a dedicated frame
/// pointer register. This is true if the function has variable sized allocas
/// or if frame pointer elimination is disabled.
@@ -585,16 +570,21 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
SmallVector<unsigned, 4> UnspilledCS2GPRs;
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- MachineFrameInfo *MFI = MF.getFrameInfo();
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
if (RealignStack) {
- unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
- calculateMaxStackAlignment(MFI));
- MFI->setMaxAlignment(MaxAlign);
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->calculateMaxStackAlignment();
}
+ // Spill R4 if Thumb2 function requires stack realignment - it will be used as
+ // scratch register.
+ // FIXME: It will be better just to find spare register here.
+ if (needsStackRealignment(MF) &&
+ AFI->isThumb2Function())
+ MF.getRegInfo().setPhysRegUsed(ARM::R4);
+
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
const unsigned *CSRegs = getCalleeSavedRegs();
@@ -1368,14 +1358,30 @@ emitPrologue(MachineFunction &MF) const {
// If we need dynamic stack realignment, do it here.
if (needsStackRealignment(MF)) {
- unsigned Opc;
unsigned MaxAlign = MFI->getMaxAlignment();
assert (!AFI->isThumb1OnlyFunction());
- Opc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri;
-
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), ARM::SP)
+ if (!AFI->isThumbFunction()) {
+ // Emit bic sp, sp, MaxAlign
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::BICri), ARM::SP)
.addReg(ARM::SP, RegState::Kill)
.addImm(MaxAlign-1)));
+ } else {
+ // We cannot use sp as source/dest register here, thus we're emitting the
+ // following sequence:
+ // mov r4, sp
+ // bic r4, r4, MaxAlign
+ // mov sp, r4
+ // FIXME: It will be better just to find spare register here.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R4)
+ .addReg(ARM::SP, RegState::Kill);
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::t2BICri), ARM::R4)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+ .addReg(ARM::R4, RegState::Kill);
+ }
}
}
@@ -1479,48 +1485,4 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
}
-namespace {
- struct MaximalStackAlignmentCalculator : public MachineFunctionPass {
- static char ID;
- MaximalStackAlignmentCalculator() : MachineFunctionPass(&ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF) {
- MachineFrameInfo *FFI = MF.getFrameInfo();
- MachineRegisterInfo &RI = MF.getRegInfo();
-
- // Calculate max stack alignment of all already allocated stack objects.
- unsigned MaxAlign = calculateMaxStackAlignment(FFI);
-
- // Be over-conservative: scan over all vreg defs and find, whether vector
- // registers are used. If yes - there is probability, that vector register
- // will be spilled and thus stack needs to be aligned properly.
- for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
- RegNum < RI.getLastVirtReg(); ++RegNum)
- MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
-
- if (FFI->getMaxAlignment() == MaxAlign)
- return false;
-
- FFI->setMaxAlignment(MaxAlign);
- return true;
- }
-
- virtual const char *getPassName() const {
- return "ARM Stack Required Alignment Auto-Detector";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-
- char MaximalStackAlignmentCalculator::ID = 0;
-}
-
-FunctionPass*
-llvm::createARMMaxStackAlignmentCalculatorPass() {
- return new MaximalStackAlignmentCalculator();
-}
-
#include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index e59a315..acd30d2 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -418,10 +418,10 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
static bool BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
MachineFunction::iterator MBBI = MBB;
- if (next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function.
+ if (llvm::next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function.
return false;
- MachineBasicBlock *NextBB = next(MBBI);
+ MachineBasicBlock *NextBB = llvm::next(MBBI);
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I)
if (*I == NextBB)
@@ -760,7 +760,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
CompareMBBNumbers);
MachineBasicBlock* WaterBB = *IP;
if (WaterBB == OrigBB)
- WaterList.insert(next(IP), NewBB);
+ WaterList.insert(llvm::next(IP), NewBB);
else
WaterList.insert(IP, OrigBB);
NewWaterList.insert(OrigBB);
@@ -887,7 +887,7 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
int delta) {
- MachineFunction::iterator MBBI = BB; MBBI = next(MBBI);
+ MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
i < e; ++i) {
BBOffsets[i] += delta;
@@ -929,7 +929,7 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
if (delta==0)
return;
}
- MBBI = next(MBBI);
+ MBBI = llvm::next(MBBI);
}
}
@@ -1096,7 +1096,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
DEBUG(errs() << "Split at end of block\n");
if (&UserMBB->back() == UserMI)
assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
- NewMBB = next(MachineFunction::iterator(UserMBB));
+ NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
// Add an unconditional branch from UserMBB to fallthrough block.
// Record it for branch lengthening; this new branch will not get out of
// range, but if the preceding conditional branch is out of range, the
@@ -1144,7 +1144,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
Offset < BaseInsertOffset;
Offset += TII->GetInstSizeInBytes(MI),
- MI = next(MI)) {
+ MI = llvm::next(MI)) {
if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) {
CPUser &U = CPUsers[CPUIndex];
if (!OffsetIsInRange(Offset, EndInsertOffset,
@@ -1204,7 +1204,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
NewWaterList.insert(NewIsland);
}
// The new CPE goes before the following block (NewMBB).
- NewMBB = next(MachineFunction::iterator(WaterBB));
+ NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
} else {
// No water found.
@@ -1406,7 +1406,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
NumCBrFixed++;
if (BMI != MI) {
- if (next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+ if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
BMI->getOpcode() == Br.UncondBr) {
// Last MI in the BB is an unconditional branch. Can we simply invert the
// condition and swap destinations:
@@ -1433,12 +1433,12 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
// branch to the destination.
int delta = TII->GetInstSizeInBytes(&MBB->back());
BBSizes[MBB->getNumber()] -= delta;
- MachineBasicBlock* SplitBB = next(MachineFunction::iterator(MBB));
+ MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
AdjustBBOffsetsAfter(SplitBB, -delta);
MBB->back().eraseFromParent();
// BBOffsets[SplitBB] is wrong temporarily, fixed below
}
- MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber()
<< " also invert condition and change dest. to BB#"
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index c929c54..1b8727d 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -48,7 +48,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
MachineInstr &MI = *MBBI;
- MachineBasicBlock::iterator NMBBI = next(MBBI);
+ MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index c839fc6..655c762 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -42,6 +42,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include <sstream>
using namespace llvm;
@@ -377,7 +378,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -500,6 +501,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
+ case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
+ case ARMISD::SYNCBARRIER: return "ARMISD::SYNCBARRIER";
+
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCGE: return "ARMISD::VCGE";
case ARMISD::VCGEU: return "ARMISD::VCGEU";
@@ -1470,6 +1474,28 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
}
}
+static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Op5 = Op.getOperand(5);
+ SDValue Res;
+ unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
+ if (isDeviceBarrier) {
+ if (Subtarget->hasV7Ops())
+ Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0));
+ else
+ Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ } else {
+ if (Subtarget->hasV7Ops())
+ Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+ else
+ Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ }
+ return Res;
+}
+
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
unsigned VarArgsFrameIndex) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
@@ -2528,6 +2554,25 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
+/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
+static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if ((unsigned) M[i] != i + WhichResult ||
+ (unsigned) M[i+1] != i + WhichResult)
+ return false;
+ }
+ return true;
+}
+
static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
@@ -2548,6 +2593,33 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
+/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
+static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned Half = VT.getVectorNumElements() / 2;
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned j = 0; j != 2; ++j) {
+ unsigned Idx = WhichResult;
+ for (unsigned i = 0; i != Half; ++i) {
+ if ((unsigned) M[i + j * Half] != Idx)
+ return false;
+ Idx += 2;
+ }
+ }
+
+ // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
@@ -2571,6 +2643,33 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
+/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
+static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned i = 0; i != NumElts; i += 2) {
+ if ((unsigned) M[i] != Idx ||
+ (unsigned) M[i+1] != Idx)
+ return false;
+ Idx += 1;
+ }
+
+ // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+
static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) {
// Canonicalize all-zeros and all-ones vectors.
ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode());
@@ -2683,7 +2782,10 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isVEXTMask(M, VT, ReverseVEXT, Imm) ||
isVTRNMask(M, VT, WhichResult) ||
isVUZPMask(M, VT, WhichResult) ||
- isVZIPMask(M, VT, WhichResult));
+ isVZIPMask(M, VT, WhichResult) ||
+ isVTRN_v_undef_Mask(M, VT, WhichResult) ||
+ isVUZP_v_undef_Mask(M, VT, WhichResult) ||
+ isVZIP_v_undef_Mask(M, VT, WhichResult));
}
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
@@ -2815,6 +2917,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
V1, V2).getValue(WhichResult);
+ if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
if (VT.getVectorNumElements() == 4 &&
@@ -2886,6 +2998,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT:
@@ -2938,14 +3051,233 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
//===----------------------------------------------------------------------===//
MachineBasicBlock *
+ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned oldval = MI->getOperand(2).getReg();
+ unsigned newval = MI->getOperand(3).getReg();
+ unsigned scratch = BB->getParent()->getRegInfo()
+ .createVirtualRegister(ARM::GPRRegisterClass);
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ unsigned ldrOpc, strOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ break;
+ }
+
+ MachineFunction *MF = BB->getParent();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It; // insert the new blocks after the current block
+
+ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loop1MBB);
+ MF->insert(It, loop2MBB);
+ MF->insert(It, exitMBB);
+ exitMBB->transferSuccessors(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loop1MBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // ldrex dest, [ptr]
+ // cmp dest, oldval
+ // bne exitMBB
+ BB = loop1MBB;
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(dest).addReg(oldval));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(exitMBB);
+
+ // loop2MBB:
+ // strex scratch, newval, [ptr]
+ // cmp scratch, #0
+ // bne loop1MBB
+ BB = loop2MBB;
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
+ .addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size, unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+ unsigned ldrOpc, strOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ break;
+ }
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->transferSuccessors(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+ unsigned scratch2 = (!BinOpcode) ? incr :
+ RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldrex dest, ptr
+ // <binop> scratch2, dest, incr
+ // strex scratch, scratch2, ptr
+ // cmp scratch, #0
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+ if (BinOpcode) {
+ // operand order needs to go the other way for NAND
+ if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+ addReg(incr).addReg(dest)).addReg(0);
+ else
+ AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+ addReg(dest).addReg(incr)).addReg(0);
+ }
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
+ .addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ return BB;
+}
+
+MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB,
DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
switch (MI->getOpcode()) {
default:
+ MI->dump();
llvm_unreachable("Unexpected instr type to insert");
+
+ case ARM::ATOMIC_LOAD_ADD_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+ case ARM::ATOMIC_LOAD_ADD_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+ case ARM::ATOMIC_LOAD_ADD_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+
+ case ARM::ATOMIC_LOAD_AND_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMIC_LOAD_AND_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMIC_LOAD_AND_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+
+ case ARM::ATOMIC_LOAD_OR_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMIC_LOAD_OR_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMIC_LOAD_OR_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+
+ case ARM::ATOMIC_LOAD_XOR_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMIC_LOAD_XOR_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMIC_LOAD_XOR_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+
+ case ARM::ATOMIC_LOAD_NAND_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+ case ARM::ATOMIC_LOAD_NAND_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+ case ARM::ATOMIC_LOAD_NAND_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+
+ case ARM::ATOMIC_LOAD_SUB_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_SUB_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_SUB_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+
+ case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
+ case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
+ case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
+
+ case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1);
+ case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
+ case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+
case ARM::tMOVCCr_pseudo: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@@ -3935,6 +4267,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return std::make_pair(0U, ARM::SPRRegisterClass);
if (VT == MVT::f64)
return std::make_pair(0U, ARM::DPRRegisterClass);
+ if (VT.getSizeInBits() == 128)
+ return std::make_pair(0U, ARM::QPRRegisterClass);
break;
}
}
@@ -3973,6 +4307,9 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
ARM::D4, ARM::D5, ARM::D6, ARM::D7,
ARM::D8, ARM::D9, ARM::D10,ARM::D11,
ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
+ if (VT.getSizeInBits() == 128)
+ return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
+ ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
break;
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 4f31f8a..e1b3348 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -72,6 +72,9 @@ namespace llvm {
DYN_ALLOC, // Dynamic allocation on the stack.
+ MEMBARRIER, // Memory barrier
+ SYNCBARRIER, // Memory sync barrier
+
VCEQ, // Vector compare equal.
VCGE, // Vector compare greater than or equal.
VCGEU, // Vector compare unsigned greater than or equal.
@@ -328,6 +331,15 @@ namespace llvm {
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl);
+
+ MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const;
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ unsigned BinOpcode) const;
+
};
}
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index e76e93c..cf0edff 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -33,6 +33,8 @@ def LdMiscFrm : Format<8>;
def StMiscFrm : Format<9>;
def LdStMulFrm : Format<10>;
+def LdStExFrm : Format<28>;
+
def ArithMiscFrm : Format<11>;
def ExtFrm : Format<12>;
@@ -199,6 +201,19 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
+// A few are not predicable
+class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = !strconcat(opc, asm);
+ let Pattern = pattern;
+ let isPredicable = 0;
+ list<Predicate> Predicates = [IsARM];
+}
// Same as I except it can optionally modify CPSR. Note it's modeled as
// an input operand since by default it's a zero register. It will
@@ -239,6 +254,10 @@ class AXI<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
: XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
asm, "", pattern>;
+class AInoP<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : InoP<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
// Ctrl flow instructions
class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
@@ -264,6 +283,28 @@ class JTI<dag oops, dag iops, InstrItinClass itin,
: XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin,
asm, "", pattern>;
+
+// Atomic load/store instructions
+
+class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{27-23} = 0b00011;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1;
+ let Inst{11-0} = 0b111110011111;
+}
+class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{27-23} = 0b00011;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0;
+ let Inst{11-4} = 0b11111001;
+}
+
// addrmode1 instructions
class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -967,6 +1008,17 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
list<Predicate> Predicates = [IsThumb2];
}
+class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb1Only];
+}
+
class T2I<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>;
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 87bb12b..85f6b40 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -60,25 +60,6 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
-bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case ARM::BX_RET: // Return.
- case ARM::LDM_RET:
- case ARM::B:
- case ARM::BRIND:
- case ARM::BR_JTr: // Jumptable branch.
- case ARM::BR_JTm: // Jumptable branch through mem.
- case ARM::BR_JTadd: // Jumptable branch add to pc.
- return true;
- default:
- break;
- }
-
- return false;
-}
-
void ARMInstrInfo::
reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig,
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 4319577..d4199d1 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -32,9 +32,6 @@ public:
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
- // Return true if the block does not fall through.
- bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 7516d3c..e14696a 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -46,6 +46,11 @@ def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
+def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>;
+def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
+def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
@@ -93,6 +98,15 @@ def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>;
def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>;
+def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7,
+ [SDNPHasChain]>;
+def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7,
+ [SDNPHasChain]>;
+def ARMMemBarrierV6 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6,
+ [SDNPHasChain]>;
+def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6,
+ [SDNPHasChain]>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
@@ -772,6 +786,7 @@ let isBranch = 1, isTerminator = 1 in {
def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
IIC_Br, "mov\tpc, $target \n$jt",
[(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
+ let Inst{11-4} = 0b00000000;
let Inst{15-12} = 0b1111;
let Inst{20} = 0; // S Bit
let Inst{24-21} = 0b1101;
@@ -1561,6 +1576,189 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst),
let Inst{25} = 1;
}
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def Int_MemBarrierV7 : AInoP<(outs), (ins),
+ Pseudo, NoItinerary,
+ "dmb", "",
+ [(ARMMemBarrierV7)]>,
+ Requires<[IsARM, HasV7]> {
+ let Inst{31-4} = 0xf57ff05;
+ // FIXME: add support for options other than a full system DMB
+ let Inst{3-0} = 0b1111;
+}
+
+def Int_SyncBarrierV7 : AInoP<(outs), (ins),
+ Pseudo, NoItinerary,
+ "dsb", "",
+ [(ARMSyncBarrierV7)]>,
+ Requires<[IsARM, HasV7]> {
+ let Inst{31-4} = 0xf57ff04;
+ // FIXME: add support for options other than a full system DSB
+ let Inst{3-0} = 0b1111;
+}
+
+def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero),
+ Pseudo, NoItinerary,
+ "mcr", "\tp15, 0, $zero, c7, c10, 5",
+ [(ARMMemBarrierV6 GPR:$zero)]>,
+ Requires<[IsARM, HasV6]> {
+ // FIXME: add support for options other than a full system DMB
+ // FIXME: add encoding
+}
+
+def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero),
+ Pseudo, NoItinerary,
+ "mcr", "\tp15, 0, $zero, c7, c10, 4",
+ [(ARMSyncBarrierV6 GPR:$zero)]>,
+ Requires<[IsARM, HasV6]> {
+ // FIXME: add support for options other than a full system DSB
+ // FIXME: add encoding
+}
+}
+
+let usesCustomInserter = 1 in {
+ let Uses = [CPSR] in {
+ def ATOMIC_LOAD_ADD_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_ADD_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_SUB_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_AND_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_OR_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_XOR_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_NAND_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_ADD_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_ADD_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_SUB_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_AND_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_OR_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_XOR_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_NAND_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_ADD_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_SUB_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_AND_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_OR_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_XOR_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_NAND_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
+
+ def ATOMIC_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ "${:comment} ATOMIC_SWAP_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
+ def ATOMIC_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ "${:comment} ATOMIC_SWAP_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
+ def ATOMIC_SWAP_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ "${:comment} ATOMIC_SWAP_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ "${:comment} ATOMIC_CMP_SWAP_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
+}
+}
+
+let mayLoad = 1 in {
+def LDREXB : AIldrex<0b10, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary,
+ "ldrexb", "\t$dest, [$ptr]",
+ []>;
+def LDREXH : AIldrex<0b11, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary,
+ "ldrexh", "\t$dest, [$ptr]",
+ []>;
+def LDREX : AIldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary,
+ "ldrex", "\t$dest, [$ptr]",
+ []>;
+def LDREXD : AIldrex<0b01, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr),
+ NoItinerary,
+ "ldrexd", "\t$dest, $dest2, [$ptr]",
+ []>;
+}
+
+let mayStore = 1 in {
+def STREXB : AIstrex<0b10, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+ NoItinerary,
+ "strexb", "\t$success, $src, [$ptr]",
+ []>;
+def STREXH : AIstrex<0b11, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+ NoItinerary,
+ "strexh", "\t$success, $src, [$ptr]",
+ []>;
+def STREX : AIstrex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+ NoItinerary,
+ "strex", "\t$success, $src, [$ptr]",
+ []>;
+def STREXD : AIstrex<0b01, (outs GPR:$success),
+ (ins GPR:$src, GPR:$src2, GPR:$ptr),
+ NoItinerary,
+ "strexd", "\t$success, $src, $src2, [$ptr]",
+ []>;
+}
//===----------------------------------------------------------------------===//
// TLS Instructions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 3166931..61b7705 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -152,7 +152,7 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
let Inst{24} = 0; // P bit
let Inst{23} = 1; // U bit
let Inst{20} = 1;
- let Inst{11-9} = 0b101;
+ let Inst{11-8} = 0b1011;
}
// Use vstmia to store a Q register as a D register pair.
@@ -164,7 +164,7 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr),
let Inst{24} = 0; // P bit
let Inst{23} = 1; // U bit
let Inst{20} = 0;
- let Inst{11-9} = 0b101;
+ let Inst{11-8} = 0b1011;
}
// VLD1 : Vector Load (multiple single elements)
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index b5956a3..9306bdb 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -669,6 +669,35 @@ let isCall = 1,
[(set R0, ARMthread_pointer)]>;
}
+// SJLJ Exception handling intrinsics
+// eh_sjlj_setjmp() is an instruction sequence to store the return
+// address and save #0 in R0 for the non-longjmp case.
+// Since by its nature we may be coming from some other function to get
+// here, and we're using the stack frame for the containing function to
+// save/restore registers, we can't keep anything live in regs across
+// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+// when we get here from a longjmp(). We force everthing out of registers
+// except for our own input by listing the relevant registers in Defs. By
+// doing so, we also cause the prologue/epilogue code to actively preserve
+// all of the callee-saved resgisters, which is exactly what we want.
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in {
+ def tInt_eh_sjlj_setjmp : ThumbXI<(outs), (ins GPR:$src),
+ AddrModeNone, SizeSpecial, NoItinerary,
+ "mov\tr12, r1\t@ begin eh.setjmp\n"
+ "\tmov\tr1, sp\n"
+ "\tstr\tr1, [$src, #8]\n"
+ "\tadr\tr1, 0f\n"
+ "\tadds\tr1, #1\n"
+ "\tstr\tr1, [$src, #4]\n"
+ "\tmov\tr1, r12\n"
+ "\tmovs\tr0, #0\n"
+ "\tb\t1f\n"
+ ".align 2\n"
+ "0:\tmovs\tr0, #1\t@ end eh.setjmp\n"
+ "1:", "",
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>;
+}
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 9489815..949ce73 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1065,6 +1065,68 @@ def t2MOVCCror : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs),
RegConstraint<"$false = $dst">;
//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def t2Int_MemBarrierV7 : AInoP<(outs), (ins),
+ Pseudo, NoItinerary,
+ "dmb", "",
+ [(ARMMemBarrierV7)]>,
+ Requires<[IsThumb2]> {
+ // FIXME: add support for options other than a full system DMB
+}
+
+def t2Int_SyncBarrierV7 : AInoP<(outs), (ins),
+ Pseudo, NoItinerary,
+ "dsb", "",
+ [(ARMSyncBarrierV7)]>,
+ Requires<[IsThumb2]> {
+ // FIXME: add support for options other than a full system DSB
+}
+}
+
+let mayLoad = 1 in {
+def t2LDREXB : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+ Size4Bytes, NoItinerary,
+ "ldrexb", "\t$dest, [$ptr]", "",
+ []>;
+def t2LDREXH : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+ Size4Bytes, NoItinerary,
+ "ldrexh", "\t$dest, [$ptr]", "",
+ []>;
+def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+ Size4Bytes, NoItinerary,
+ "ldrex", "\t$dest, [$ptr]", "",
+ []>;
+def t2LDREXD : Thumb2I<(outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "ldrexd", "\t$dest, $dest2, [$ptr]", "",
+ []>;
+}
+
+let mayStore = 1 in {
+def t2STREXB : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "strexb", "\t$success, $src, [$ptr]", "",
+ []>;
+def t2STREXH : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "strexh", "\t$success, $src, [$ptr]", "",
+ []>;
+def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "strex", "\t$success, $src, [$ptr]", "",
+ []>;
+def t2STREXD : Thumb2I<(outs GPR:$success),
+ (ins GPR:$src, GPR:$src2, GPR:$ptr),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "strexd", "\t$success, $src, $src2, [$ptr]", "",
+ []>;
+}
+
+//===----------------------------------------------------------------------===//
// TLS Instructions
//
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 304d0ef..22bd80e 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -449,7 +449,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
}
if (MBBI != MBB.end()) {
- MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
@@ -494,7 +494,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
}
if (MBBI != MBB.end()) {
- MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
if (Mode == ARM_AM::ia &&
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
@@ -604,7 +604,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
}
if (!DoMerge && MBBI != MBB.end()) {
- MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
if (!isAM5 &&
isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
DoMerge = true;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 2564ed9..1c6fca7 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -95,7 +95,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
- PM.add(createARMMaxStackAlignmentCalculatorPass());
+ PM.add(createMaxStackAlignmentCalculatorPass());
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 692bb19..362bbf1 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -1045,6 +1045,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
printNoHashImmediate(MI, OpNum);
return false;
case 'P': // Print a VFP double precision register.
+ case 'q': // Print a NEON quad precision register.
printOperand(MI, OpNum);
return false;
case 'Q':
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 50abcf4..3c0414d 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -51,7 +51,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
MachineBasicBlock::iterator NextMII;
for (; MII != E; MII = NextMII) {
- NextMII = next(MII);
+ NextMII = llvm::next(MII);
MachineInstr *MI = &*MII;
if (MI->getOpcode() == ARM::VMOVD &&
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index 206677b..d9942c8 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -338,7 +338,7 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride))
continue;
- MachineBasicBlock::iterator NextI = next(MBBI);
+ MachineBasicBlock::iterator NextI = llvm::next(MBBI);
for (unsigned R = 0; R < NumRegs; ++R) {
MachineOperand &MO = MI->getOperand(FirstOpnd + R);
assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 7602b6d..66d3b83 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -32,25 +32,6 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
-bool
-Thumb1InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case ARM::tBX_RET:
- case ARM::tBX_RET_vararg:
- case ARM::tPOP_RET:
- case ARM::tB:
- case ARM::tBRIND:
- case ARM::tBR_JTr:
- return true;
- default:
- break;
- }
-
- return false;
-}
-
bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index b28229d..516ddf1 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -31,9 +31,6 @@ public:
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
- // Return true if the block does not fall through.
- bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 37adf37..9f3816a 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -528,7 +528,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i+1).ChangeToImmediate(Mask);
}
Offset = (Offset - Mask * Scale);
- MachineBasicBlock::iterator NII = next(II);
+ MachineBasicBlock::iterator NII = llvm::next(II);
emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
*this, dl);
} else {
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 16c1e6f..f4a8c27 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -36,30 +36,6 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
}
bool
-Thumb2InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case ARM::t2LDM_RET:
- case ARM::t2B: // Uncond branch.
- case ARM::t2BR_JT: // Jumptable branch.
- case ARM::t2TBB: // Table branch byte.
- case ARM::t2TBH: // Table branch halfword.
- case ARM::tBR_JTr: // Jumptable branch (16-bit version).
- case ARM::tBX_RET:
- case ARM::tBX_RET_vararg:
- case ARM::tPOP_RET:
- case ARM::tB:
- case ARM::tBRIND:
- return true;
- default:
- break;
- }
-
- return false;
-}
-
-bool
Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 663a60b..a0f89a6 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -31,9 +31,6 @@ public:
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
- // Return true if the block does not fall through.
- bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-
bool copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index b2fd7b3..35359aa 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -649,7 +649,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
MachineBasicBlock::iterator NextMII;
for (; MII != E; MII = NextMII) {
- NextMII = next(MII);
+ NextMII = llvm::next(MII);
MachineInstr *MI = &*MII;
LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index 86173ff..39f0749 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -392,18 +392,6 @@ void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB,
.addReg(Alpha::R31);
}
-bool AlphaInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case Alpha::RETDAG: // Return.
- case Alpha::RETDAGp:
- case Alpha::BR: // Uncond branch.
- case Alpha::JMP: // Indirect branch.
- return true;
- default: return false;
- }
-}
bool AlphaInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 2 && "Invalid Alpha branch opcode!");
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
index 274f452..c3b6044 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -78,7 +78,6 @@ public:
unsigned RemoveBranch(MachineBasicBlock &MBB) const;
void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
- bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
/// getGlobalBaseReg - Return a virtual register initialized with the
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 9e4fe27..a52ca79 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -2573,7 +2573,7 @@ void CWriter::visitSwitchInst(SwitchInst &SI) {
BasicBlock *Succ = cast<BasicBlock>(SI.getOperand(i+1));
printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
printBranchToBlock(SI.getParent(), Succ, 2);
- if (Function::iterator(Succ) == next(Function::iterator(SI.getParent())))
+ if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent())))
Out << " break;\n";
}
Out << " }\n";
@@ -2593,7 +2593,7 @@ bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) {
/// FIXME: This should be reenabled, but loop reordering safe!!
return true;
- if (next(Function::iterator(From)) != Function::iterator(To))
+ if (llvm::next(Function::iterator(From)) != Function::iterator(To))
return true; // Not the direct successor, we need a goto.
//isa<SwitchInst>(From->getTerminator())
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index ecce8e3..2306665 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -580,10 +580,6 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
}
}
-bool
-SPUInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- return (!MBB.empty() && isUncondBranch(&MBB.back()));
-}
//! Reverses a branch's condition, returning false on success.
bool
SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index c644a11..42677fc 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -79,9 +79,6 @@ namespace llvm {
bool canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const;
- //! Return true if the specified block does not fall through
- virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-
//! Reverses a branch's condition, returning false on success.
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 4bae6c7..a872fbd 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -976,21 +976,20 @@ namespace {
nl(Out);
printType(GV->getType());
if (GV->hasInitializer()) {
- Constant* Init = GV->getInitializer();
+ Constant *Init = GV->getInitializer();
printType(Init->getType());
- if (Function* F = dyn_cast<Function>(Init)) {
+ if (Function *F = dyn_cast<Function>(Init)) {
nl(Out)<< "/ Function Declarations"; nl(Out);
printFunctionHead(F);
} else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
nl(Out) << "// Global Variable Declarations"; nl(Out);
printVariableHead(gv);
- } else {
- nl(Out) << "// Constant Definitions"; nl(Out);
- printConstant(gv);
- }
- if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+
nl(Out) << "// Global Variable Definitions"; nl(Out);
printVariableBody(gv);
+ } else {
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstant(Init);
}
}
}
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index beccb2c..4edf422 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -86,10 +86,10 @@ namespace {
void dump() {
errs() << "MSP430ISelAddressMode " << this << '\n';
- if (Base.Reg.getNode() != 0) {
+ if (BaseType == RegBase && Base.Reg.getNode() != 0) {
errs() << "Base.Reg ";
Base.Reg.getNode()->dump();
- } else {
+ } else if (BaseType == FrameIndexBase) {
errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
}
errs() << " Disp " << Disp << '\n';
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 29cc370..5fe9b20 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -15,6 +15,7 @@
#include "MSP430ISelLowering.h"
#include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
#include "MSP430TargetMachine.h"
#include "MSP430Subtarget.h"
#include "llvm/DerivedTypes.h"
@@ -32,16 +33,38 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/VectorExtras.h"
using namespace llvm;
+typedef enum {
+ NoHWMult,
+ HWMultIntr,
+ HWMultNoIntr
+} HWMultUseMode;
+
+static cl::opt<HWMultUseMode>
+HWMultMode("msp430-hwmult-mode",
+ cl::desc("Hardware multiplier use mode"),
+ cl::init(HWMultNoIntr),
+ cl::values(
+ clEnumValN(NoHWMult, "no",
+ "Do not use hardware multiplier"),
+ clEnumValN(HWMultIntr, "interrupts",
+ "Assume hardware multiplier can be used inside interrupts"),
+ clEnumValN(HWMultNoIntr, "use",
+ "Assume hardware multiplier cannot be used inside interrupts"),
+ clEnumValEnd));
+
MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
TargetLowering(tm, new TargetLoweringObjectFileELF()),
Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+ TD = getTargetData();
+
// Set up the register classes.
addRegisterClass(MVT::i8, MSP430::GR8RegisterClass);
addRegisterClass(MVT::i16, MSP430::GR16RegisterClass);
@@ -92,8 +115,8 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setOperationAction(ISD::BR_CC, MVT::i8, Custom);
setOperationAction(ISD::BR_CC, MVT::i16, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
- setOperationAction(ISD::SETCC, MVT::i8, Expand);
- setOperationAction(ISD::SETCC, MVT::i16, Expand);
+ setOperationAction(ISD::SETCC, MVT::i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::i16, Custom);
setOperationAction(ISD::SELECT, MVT::i8, Expand);
setOperationAction(ISD::SELECT, MVT::i16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
@@ -142,6 +165,15 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setOperationAction(ISD::SDIV, MVT::i16, Expand);
setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
setOperationAction(ISD::SREM, MVT::i16, Expand);
+
+ // Libcalls names.
+ if (HWMultMode == HWMultIntr) {
+ setLibcallName(RTLIB::MUL_I8, "__mulqi3hw");
+ setLibcallName(RTLIB::MUL_I16, "__mulhi3hw");
+ } else if (HWMultMode == HWMultNoIntr) {
+ setLibcallName(RTLIB::MUL_I8, "__mulqi3hw_noint");
+ setLibcallName(RTLIB::MUL_I16, "__mulhi3hw_noint");
+ }
}
SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
@@ -151,9 +183,12 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
case ISD::SRA: return LowerShifts(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
default:
llvm_unreachable("unimplemented operand");
return SDValue();
@@ -225,6 +260,13 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
case CallingConv::C:
case CallingConv::Fast:
return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
+ case CallingConv::MSP430_INTR:
+ if (Ins.empty())
+ return Chain;
+ else {
+ llvm_report_error("ISRs cannot have arguments");
+ return SDValue();
+ }
}
}
@@ -244,6 +286,9 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
case CallingConv::C:
return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
Outs, Ins, dl, DAG, InVals);
+ case CallingConv::MSP430_INTR:
+ llvm_report_error("ISRs cannot be called directly");
+ return SDValue();
}
}
@@ -340,6 +385,12 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
// CCValAssign - represent the assignment of the return value to a location
SmallVector<CCValAssign, 16> RVLocs;
+ // ISRs cannot return any value.
+ if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) {
+ llvm_report_error("ISRs cannot return any value");
+ return SDValue();
+ }
+
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
RVLocs, *DAG.getContext());
@@ -370,11 +421,14 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
Flag = Chain.getValue(1);
}
+ unsigned Opc = (CallConv == CallingConv::MSP430_INTR ?
+ MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG);
+
if (Flag.getNode())
- return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ return DAG.getNode(Opc, dl, MVT::Other, Chain, Flag);
// Return Void
- return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain);
+ return DAG.getNode(Opc, dl, MVT::Other, Chain);
}
/// LowerCCCCallTo - functions arguments are copied from virtual regs to
@@ -538,9 +592,21 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
EVT VT = Op.getValueType();
DebugLoc dl = N->getDebugLoc();
- // We currently only lower shifts of constant argument.
+ // Expand non-constant shifts to loops:
if (!isa<ConstantSDNode>(N->getOperand(1)))
- return SDValue();
+ switch (Opc) {
+ default:
+ assert(0 && "Invalid shift opcode!");
+ case ISD::SHL:
+ return DAG.getNode(MSP430ISD::SHL, dl,
+ VT, N->getOperand(0), N->getOperand(1));
+ case ISD::SRA:
+ return DAG.getNode(MSP430ISD::SRA, dl,
+ VT, N->getOperand(0), N->getOperand(1));
+ case ISD::SRL:
+ return DAG.getNode(MSP430ISD::SRL, dl,
+ VT, N->getOperand(0), N->getOperand(1));
+ }
uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
@@ -648,6 +714,88 @@ SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
Chain, Dest, TargetCC, Flag);
}
+
+SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If we are doing an AND and testing against zero, then the CMP
+ // will not be generated. The AND (or BIT) will generate the condition codes,
+ // but they are different from CMP.
+ bool andCC = false;
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ if (RHSC->isNullValue() && LHS.hasOneUse() &&
+ (LHS.getOpcode() == ISD::AND ||
+ (LHS.getOpcode() == ISD::TRUNCATE &&
+ LHS.getOperand(0).getOpcode() == ISD::AND))) {
+ andCC = true;
+ }
+ }
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDValue TargetCC;
+ SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+ // Get the condition codes directly from the status register, if its easy.
+ // Otherwise a branch will be generated. Note that the AND and BIT
+ // instructions generate different flags than CMP, the carry bit can be used
+ // for NE/EQ.
+ bool Invert = false;
+ bool Shift = false;
+ bool Convert = true;
+ switch (cast<ConstantSDNode>(TargetCC)->getZExtValue()) {
+ default:
+ Convert = false;
+ break;
+ case MSP430CC::COND_HS:
+ // Res = SRW & 1, no processing is required
+ break;
+ case MSP430CC::COND_LO:
+ // Res = ~(SRW & 1)
+ Invert = true;
+ break;
+ case MSP430CC::COND_NE:
+ if (andCC) {
+ // C = ~Z, thus Res = SRW & 1, no processing is required
+ } else {
+ // Res = (SRW >> 1) & 1
+ Shift = true;
+ }
+ break;
+ case MSP430CC::COND_E:
+ if (andCC) {
+ // C = ~Z, thus Res = ~(SRW & 1)
+ } else {
+ // Res = ~((SRW >> 1) & 1)
+ Shift = true;
+ }
+ Invert = true;
+ break;
+ }
+ EVT VT = Op.getValueType();
+ SDValue One = DAG.getConstant(1, VT);
+ if (Convert) {
+ SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SRW,
+ MVT::i16, Flag);
+ if (Shift)
+ // FIXME: somewhere this is turned into a SRL, lower it MSP specific?
+ SR = DAG.getNode(ISD::SRA, dl, MVT::i16, SR, One);
+ SR = DAG.getNode(ISD::AND, dl, MVT::i16, SR, One);
+ if (Invert)
+ SR = DAG.getNode(ISD::XOR, dl, MVT::i16, SR, One);
+ return SR;
+ } else {
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(One);
+ Ops.push_back(Zero);
+ Ops.push_back(TargetCC);
+ Ops.push_back(Flag);
+ return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+ }
+}
+
SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -682,6 +830,55 @@ SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
DAG.getValueType(Val.getValueType()));
}
+SDValue MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+ int ReturnAddrIndex = FuncInfo->getRAIndex();
+
+ if (ReturnAddrIndex == 0) {
+ // Set up a frame object for the return address.
+ uint64_t SlotSize = TD->getPointerSize();
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+ true, false);
+ FuncInfo->setRAIndex(ReturnAddrIndex);
+ }
+
+ return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Depth > 0) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset =
+ DAG.getConstant(TD->getPointerSize(), MVT::i16);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FrameAddr, Offset),
+ NULL, 0);
+ }
+
+ // Just load the return address.
+ SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ RetAddrFI, NULL, 0);
+}
+
+SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ MSP430::FPW, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+ return FrameAddr;
+}
+
/// getPostIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if this node can be
/// combined with a load / store to form a post-indexed load / store.
@@ -722,6 +919,7 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return NULL;
case MSP430ISD::RET_FLAG: return "MSP430ISD::RET_FLAG";
+ case MSP430ISD::RETI_FLAG: return "MSP430ISD::RETI_FLAG";
case MSP430ISD::RRA: return "MSP430ISD::RRA";
case MSP430ISD::RLA: return "MSP430ISD::RLA";
case MSP430ISD::RRC: return "MSP430ISD::RRC";
@@ -730,6 +928,8 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
case MSP430ISD::BR_CC: return "MSP430ISD::BR_CC";
case MSP430ISD::CMP: return "MSP430ISD::CMP";
case MSP430ISD::SELECT_CC: return "MSP430ISD::SELECT_CC";
+ case MSP430ISD::SHL: return "MSP430ISD::SHL";
+ case MSP430ISD::SRA: return "MSP430ISD::SRA";
}
}
@@ -738,13 +938,131 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
//===----------------------------------------------------------------------===//
MachineBasicBlock*
+MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+ MachineFunction *F = BB->getParent();
+ MachineRegisterInfo &RI = F->getRegInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+
+ unsigned Opc;
+ const TargetRegisterClass * RC;
+ switch (MI->getOpcode()) {
+ default:
+ assert(0 && "Invalid shift opcode!");
+ case MSP430::Shl8:
+ Opc = MSP430::SHL8r1;
+ RC = MSP430::GR8RegisterClass;
+ break;
+ case MSP430::Shl16:
+ Opc = MSP430::SHL16r1;
+ RC = MSP430::GR16RegisterClass;
+ break;
+ case MSP430::Sra8:
+ Opc = MSP430::SAR8r1;
+ RC = MSP430::GR8RegisterClass;
+ break;
+ case MSP430::Sra16:
+ Opc = MSP430::SAR16r1;
+ RC = MSP430::GR16RegisterClass;
+ break;
+ case MSP430::Srl8:
+ Opc = MSP430::SAR8r1c;
+ RC = MSP430::GR8RegisterClass;
+ break;
+ case MSP430::Srl16:
+ Opc = MSP430::SAR16r1c;
+ RC = MSP430::GR16RegisterClass;
+ break;
+ }
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator I = BB;
+ ++I;
+
+ // Create loop block
+ MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(I, LoopBB);
+ F->insert(I, RemBB);
+
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the block containing instructions after shift.
+ RemBB->transferSuccessors(BB);
+
+ // Inform sdisel of the edge changes.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ EM->insert(std::make_pair(*SI, RemBB));
+
+ // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
+ BB->addSuccessor(LoopBB);
+ BB->addSuccessor(RemBB);
+ LoopBB->addSuccessor(RemBB);
+ LoopBB->addSuccessor(LoopBB);
+
+ unsigned ShiftAmtReg = RI.createVirtualRegister(MSP430::GR8RegisterClass);
+ unsigned ShiftAmtReg2 = RI.createVirtualRegister(MSP430::GR8RegisterClass);
+ unsigned ShiftReg = RI.createVirtualRegister(RC);
+ unsigned ShiftReg2 = RI.createVirtualRegister(RC);
+ unsigned ShiftAmtSrcReg = MI->getOperand(2).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // BB:
+ // cmp 0, N
+ // je RemBB
+ BuildMI(BB, dl, TII.get(MSP430::CMP8ir))
+ .addImm(0).addReg(ShiftAmtSrcReg);
+ BuildMI(BB, dl, TII.get(MSP430::JCC))
+ .addMBB(RemBB)
+ .addImm(MSP430CC::COND_E);
+
+ // LoopBB:
+ // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
+ // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB]
+ // ShiftReg2 = shift ShiftReg
+ // ShiftAmt2 = ShiftAmt - 1;
+ BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftReg)
+ .addReg(SrcReg).addMBB(BB)
+ .addReg(ShiftReg2).addMBB(LoopBB);
+ BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg)
+ .addReg(ShiftAmtSrcReg).addMBB(BB)
+ .addReg(ShiftAmtReg2).addMBB(LoopBB);
+ BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
+ .addReg(ShiftReg);
+ BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2)
+ .addReg(ShiftAmtReg).addImm(1);
+ BuildMI(LoopBB, dl, TII.get(MSP430::JCC))
+ .addMBB(LoopBB)
+ .addImm(MSP430CC::COND_NE);
+
+ // RemBB:
+ // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
+ BuildMI(RemBB, dl, TII.get(MSP430::PHI), DstReg)
+ .addReg(SrcReg).addMBB(BB)
+ .addReg(ShiftReg2).addMBB(LoopBB);
+
+ return RemBB;
+}
+
+MachineBasicBlock*
MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB,
DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+ unsigned Opc = MI->getOpcode();
+
+ if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 ||
+ Opc == MSP430::Sra8 || Opc == MSP430::Sra16 ||
+ Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
+ return EmitShiftInstr(MI, BB, EM);
+
const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
- assert((MI->getOpcode() == MSP430::Select16 ||
- MI->getOpcode() == MSP430::Select8) &&
+
+ assert((Opc == MSP430::Select16 || Opc == MSP430::Select8) &&
"Unexpected instr type to insert");
// To "insert" a SELECT instruction, we actually have to insert the diamond
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index d413ccb..4921500 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -27,6 +27,9 @@ namespace llvm {
/// Return with a flag operand. Operand 0 is the chain operand.
RET_FLAG,
+ /// Same as RET_FLAG, but used for returning from ISRs.
+ RETI_FLAG,
+
/// Y = R{R,L}A X, rotate right (left) arithmetically
RRA, RLA,
@@ -44,7 +47,7 @@ namespace llvm {
/// CMP - Compare instruction.
CMP,
- /// SetCC. Operand 0 is condition code, and operand 1 is the flag
+ /// SetCC - Operand 0 is condition code, and operand 1 is the flag
/// operand produced by a CMP instruction.
SETCC,
@@ -54,9 +57,12 @@ namespace llvm {
/// instruction.
BR_CC,
- /// SELECT_CC. Operand 0 and operand 1 are selection variable, operand 3
+ /// SELECT_CC - Operand 0 and operand 1 are selection variable, operand 3
/// is condition code and operand 4 is flag operand.
- SELECT_CC
+ SELECT_CC,
+
+ /// SHL, SRA, SRL - Non-constant shifts.
+ SHL, SRA, SRL
};
}
@@ -81,8 +87,12 @@ namespace llvm {
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+ SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
TargetLowering::ConstraintType
getConstraintType(const std::string &Constraint) const;
@@ -92,6 +102,9 @@ namespace llvm {
MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB,
DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+ MachineBasicBlock* EmitShiftInstr(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
private:
SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
@@ -144,6 +157,7 @@ namespace llvm {
const MSP430Subtarget &Subtarget;
const MSP430TargetMachine &TM;
+ const TargetData *TD;
};
} // namespace llvm
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index b2f09c7..2ae6759 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -219,17 +219,6 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
-bool MSP430InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB)const{
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case MSP430::RET: // Return.
- case MSP430::JMP: // Uncond branch.
- return true;
- default: return false;
- }
-}
-
bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
const TargetInstrDesc &TID = MI->getDesc();
if (!TID.isTerminator()) return false;
@@ -270,8 +259,8 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
}
// If the block has any instructions after a JMP, delete them.
- while (next(I) != MBB.end())
- next(I)->eraseFromParent();
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
Cond.clear();
FBB = 0;
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index 35e35db..e4ceeb9 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -61,7 +61,6 @@ public:
// Branch folding goodness
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
- bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
bool isUnpredicatedTerminator(const MachineInstr *MI) const;
bool AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 7a26f6c..d67ba90 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -31,12 +31,15 @@ def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
SDTCisVT<1, i8>]>;
def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
SDTCisVT<3, i8>]>;
+def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisI8<2>]>;
//===----------------------------------------------------------------------===//
// MSP430 Specific Node Definitions.
//===----------------------------------------------------------------------===//
-def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInFlag]>;
+def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
@@ -54,6 +57,9 @@ def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>;
def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>;
def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, [SDNPHasChain, SDNPInFlag]>;
def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, [SDNPInFlag]>;
+def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>;
+def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>;
+def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>;
//===----------------------------------------------------------------------===//
// MSP430 Operand Definitions.
@@ -90,7 +96,9 @@ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], []>;
// Pattern Fragments
def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>;
-
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
//===----------------------------------------------------------------------===//
// Instruction list..
@@ -117,6 +125,27 @@ let usesCustomInserter = 1 in {
"# Select16 PSEUDO",
[(set GR16:$dst,
(MSP430selectcc GR16:$src1, GR16:$src2, imm:$cc))]>;
+ let Defs = [SRW] in {
+ def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+ "# Shl8 PSEUDO",
+ [(set GR8:$dst, (MSP430shl GR8:$src, GR8:$cnt))]>;
+ def Shl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+ "# Shl16 PSEUDO",
+ [(set GR16:$dst, (MSP430shl GR16:$src, GR8:$cnt))]>;
+ def Sra8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+ "# Sra8 PSEUDO",
+ [(set GR8:$dst, (MSP430sra GR8:$src, GR8:$cnt))]>;
+ def Sra16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+ "# Sra16 PSEUDO",
+ [(set GR16:$dst, (MSP430sra GR16:$src, GR8:$cnt))]>;
+ def Srl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+ "# Srl8 PSEUDO",
+ [(set GR8:$dst, (MSP430srl GR8:$src, GR8:$cnt))]>;
+ def Srl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+ "# Srl16 PSEUDO",
+ [(set GR16:$dst, (MSP430srl GR16:$src, GR8:$cnt))]>;
+
+ }
}
let neverHasSideEffects = 1 in
@@ -128,7 +157,8 @@ def NOP : Pseudo<(outs), (ins), "nop", []>;
// FIXME: Provide proper encoding!
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
- def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>;
+ def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>;
+ def RETI : Pseudo<(outs), (ins), "reti", [(MSP430retiflag)]>;
}
let isBranch = 1, isTerminator = 1 in {
@@ -823,6 +853,65 @@ def CMP16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2),
"cmp.w\t{$src1, $src2}",
[(MSP430cmp (load addr:$src1), GR16:$src2), (implicit SRW)]>;
+
+// BIT TESTS, just sets condition codes
+// Note that the C condition is set differently than when using CMP.
+let isCommutable = 1 in {
+def BIT8rr : Pseudo<(outs), (ins GR8:$src1, GR8:$src2),
+ "bit.b\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su GR8:$src1, GR8:$src2)),
+ (implicit SRW)]>;
+def BIT16rr : Pseudo<(outs), (ins GR16:$src1, GR16:$src2),
+ "bit.w\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su GR16:$src1, GR16:$src2)),
+ (implicit SRW)]>;
+}
+def BIT8ri : Pseudo<(outs), (ins GR8:$src1, i8imm:$src2),
+ "bit.b\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su GR8:$src1, imm:$src2)),
+ (implicit SRW)]>;
+def BIT16ri : Pseudo<(outs), (ins GR16:$src1, i16imm:$src2),
+ "bit.w\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su GR16:$src1, imm:$src2)),
+ (implicit SRW)]>;
+
+def BIT8rm : Pseudo<(outs), (ins GR8:$src1, memdst:$src2),
+ "bit.b\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su GR8:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+def BIT16rm : Pseudo<(outs), (ins GR16:$src1, memdst:$src2),
+ "bit.w\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su GR16:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+
+def BIT8mr : Pseudo<(outs), (ins memsrc:$src1, GR8:$src2),
+ "bit.b\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su (load addr:$src1), GR8:$src2)),
+ (implicit SRW)]>;
+def BIT16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2),
+ "bit.w\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su (load addr:$src1), GR16:$src2)),
+ (implicit SRW)]>;
+
+def BIT8mi : Pseudo<(outs), (ins memsrc:$src1, i8imm:$src2),
+ "bit.b\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su (load addr:$src1), (i8 imm:$src2))),
+ (implicit SRW)]>;
+def BIT16mi : Pseudo<(outs), (ins memsrc:$src1, i16imm:$src2),
+ "bit.w\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su (load addr:$src1), (i16 imm:$src2))),
+ (implicit SRW)]>;
+
+def BIT8mm : Pseudo<(outs), (ins memsrc:$src1, memsrc:$src2),
+ "bit.b\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su (i8 (load addr:$src1)),
+ (load addr:$src2))),
+ (implicit SRW)]>;
+def BIT16mm : Pseudo<(outs), (ins memsrc:$src1, memsrc:$src2),
+ "bit.w\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su (i16 (load addr:$src1)),
+ (load addr:$src2))),
+ (implicit SRW)]>;
} // Defs = [SRW]
//===----------------------------------------------------------------------===//
@@ -905,3 +994,6 @@ def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
// peephole patterns
def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>;
+def : Pat<(MSP430cmp 0, (trunc (and_su GR16:$src1, GR16:$src2))),
+ (BIT8rr (EXTRACT_SUBREG GR16:$src1, subreg_8bit),
+ (EXTRACT_SUBREG GR16:$src2, subreg_8bit))>;
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index 1d26ae3..383fd2e 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -25,14 +25,20 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo {
/// stack frame in bytes.
unsigned CalleeSavedFrameSize;
+ /// ReturnAddrIndex - FrameIndex for return slot.
+ int ReturnAddrIndex;
+
public:
MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
explicit MSP430MachineFunctionInfo(MachineFunction &MF)
- : CalleeSavedFrameSize(0) {}
+ : CalleeSavedFrameSize(0), ReturnAddrIndex(0) {}
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+ int getRAIndex() const { return ReturnAddrIndex; }
+ void setRAIndex(int Index) { ReturnAddrIndex = Index; }
};
} // End llvm namespace
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 92baad9..e85c7a2 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -17,6 +17,7 @@
#include "MSP430MachineFunctionInfo.h"
#include "MSP430RegisterInfo.h"
#include "MSP430TargetMachine.h"
+#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -37,17 +38,26 @@ MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
const unsigned*
MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const Function* F = MF->getFunction();
static const unsigned CalleeSavedRegs[] = {
MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
0
};
+ static const unsigned CalleeSavedRegsIntr[] = {
+ MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+ 0
+ };
- return CalleeSavedRegs;
+ return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+ CalleeSavedRegsIntr : CalleeSavedRegs);
}
const TargetRegisterClass *const *
MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ const Function* F = MF->getFunction();
static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
&MSP430::GR16RegClass, &MSP430::GR16RegClass,
&MSP430::GR16RegClass, &MSP430::GR16RegClass,
@@ -55,8 +65,18 @@ MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
&MSP430::GR16RegClass, &MSP430::GR16RegClass,
0
};
+ static const TargetRegisterClass * const CalleeSavedRegClassesIntr[] = {
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ 0
+ };
- return CalleeSavedRegClasses;
+ return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+ CalleeSavedRegClassesIntr : CalleeSavedRegClasses);
}
BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -193,10 +213,10 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// We need to materialize the offset via add instruction.
unsigned DstReg = MI.getOperand(0).getReg();
if (Offset < 0)
- BuildMI(MBB, next(II), dl, TII.get(MSP430::SUB16ri), DstReg)
+ BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::SUB16ri), DstReg)
.addReg(DstReg).addImm(-Offset);
else
- BuildMI(MBB, next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
+ BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
.addReg(DstReg).addImm(Offset);
return 0;
@@ -251,7 +271,7 @@ void MSP430RegisterInfo::emitPrologue(MachineFunction &MF) const {
.addReg(MSP430::SPW);
// Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
I != E; ++I)
I->addLiveIn(MSP430::FPW);
@@ -292,7 +312,8 @@ void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF,
DebugLoc DL = MBBI->getDebugLoc();
switch (RetOpcode) {
- case MSP430::RET: break; // These are ok
+ case MSP430::RET:
+ case MSP430::RETI: break; // These are ok
default:
llvm_unreachable("Can only insert epilog into returning blocks");
}
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 6d8e160..48b9bdf 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -590,22 +590,6 @@ RemoveBranch(MachineBasicBlock &MBB) const
return 2;
}
-/// BlockHasNoFallThrough - Analyze if MachineBasicBlock does not
-/// fall-through into its successor block.
-bool MipsInstrInfo::
-BlockHasNoFallThrough(const MachineBasicBlock &MBB) const
-{
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case Mips::RET: // Return.
- case Mips::JR: // Indirect branch.
- case Mips::J: // Uncond branch.
- return true;
- default: return false;
- }
-}
-
/// ReverseBranchCondition - Return the inverse opcode of the
/// specified Branch instruction.
bool MipsInstrInfo::
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 249d3de..ab8dc59 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -232,7 +232,6 @@ public:
return 0;
}
- virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 0083598..af7d812 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -740,18 +740,6 @@ bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
}
-bool PPCInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case PPC::BLR: // Return.
- case PPC::B: // Uncond branch.
- case PPC::BCTR: // Indirect branch.
- return true;
- default: return false;
- }
-}
-
bool PPCInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index bb0dc15a..57facac 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -143,7 +143,6 @@ public:
virtual bool canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const;
- virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 2d8a687..e1772c2 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -801,8 +801,21 @@ void bar(unsigned n) {
true();
}
-I think this basically amounts to a dag combine to simplify comparisons against
-multiply hi's into a comparison against the mullo.
+This is equivalent to the following, where 2863311531 is the multiplicative
+inverse of 3, and 1431655766 is ((2^32)-1)/3+1:
+void bar(unsigned n) {
+ if (n * 2863311531U < 1431655766U)
+ true();
+}
+
+The same transformation can work with an even modulo with the addition of a
+rotate: rotate the result of the multiply to the right by the number of bits
+which need to be zero for the condition to be true, and shrink the compare RHS
+by the same amount. Unless the target supports rotates, though, that
+transformation probably isn't worthwhile.
+
+The transformation can also easily be made to work with non-zero equality
+comparisons: just transform, for example, "n % 3 == 1" to "(n-1) % 3 == 0".
//===---------------------------------------------------------------------===//
@@ -823,20 +836,6 @@ int main() {
//===---------------------------------------------------------------------===//
-Instcombine will merge comparisons like (x >= 10) && (x < 20) by producing (x -
-10) u< 10, but only when the comparisons have matching sign.
-
-This could be converted with a similiar technique. (PR1941)
-
-define i1 @test(i8 %x) {
- %A = icmp uge i8 %x, 5
- %B = icmp slt i8 %x, 20
- %C = and i1 %A, %B
- ret i1 %C
-}
-
-//===---------------------------------------------------------------------===//
-
These functions perform the same computation, but produce different assembly.
define i8 @select(i8 %x) readnone nounwind {
@@ -884,18 +883,6 @@ The expression should optimize to something like
//===---------------------------------------------------------------------===//
-From GCC Bug 15241:
-unsigned int
-foo (unsigned int a, unsigned int b)
-{
- if (a <= 7 && b <= 7)
- baz ();
-}
-Should combine to "(a|b) <= 7". Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
-
-//===---------------------------------------------------------------------===//
-
From GCC Bug 3756:
int
pn (int n)
@@ -907,19 +894,6 @@ Should combine to (n >> 31) | 1. Currently not optimized with "clang
//===---------------------------------------------------------------------===//
-From GCC Bug 28685:
-int test(int a, int b)
-{
- int lt = a < b;
- int eq = a == b;
-
- return (lt || eq);
-}
-Should combine to "a <= b". Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts | llc".
-
-//===---------------------------------------------------------------------===//
-
void a(int variable)
{
if (variable == 4 || variable == 6)
@@ -993,12 +967,6 @@ Should combine to 0. Currently not optimized with "clang
//===---------------------------------------------------------------------===//
-int a(unsigned char* b) {return *b > 99;}
-There's an unnecessary zext in the generated code with "clang
--emit-llvm-bc | opt -std-compile-opts".
-
-//===---------------------------------------------------------------------===//
-
int a(unsigned b) {return ((b << 31) | (b << 30)) >> 31;}
Should be combined to "((b >> 1) | b) & 1". Currently not optimized
with "clang -emit-llvm-bc | opt -std-compile-opts".
@@ -1011,12 +979,6 @@ Should combine to "x | (y & 3)". Currently not optimized with "clang
//===---------------------------------------------------------------------===//
-unsigned a(unsigned a) {return ((a | 1) & 3) | (a & -4);}
-Should combine to "a | 1". Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
-
-//===---------------------------------------------------------------------===//
-
int a(int a, int b, int c) {return (~a & c) | ((c|a) & b);}
Should fold to "(~a & c) | (a & b)". Currently not optimized with
"clang -emit-llvm-bc | opt -std-compile-opts".
@@ -1704,3 +1666,50 @@ need all but the bottom two bits from %A, and if we gave that mask to SDB it
would delete the or instruction for us.
//===---------------------------------------------------------------------===//
+
+FunctionAttrs is not marking this function as readnone (just readonly):
+$ clang t.c -emit-llvm -S -o - -O0 | opt -mem2reg -S -functionattrs
+
+int t(int a, int b, int c) {
+ int *p;
+ if (a)
+ p = &a;
+ else
+ p = &c;
+ return *p;
+}
+
+This is because we codegen this to:
+
+define i32 @t(i32 %a, i32 %b, i32 %c) nounwind readonly ssp {
+entry:
+ %a.addr = alloca i32 ; <i32*> [#uses=3]
+ %c.addr = alloca i32 ; <i32*> [#uses=2]
+...
+
+if.end:
+ %p.0 = phi i32* [ %a.addr, %if.then ], [ %c.addr, %if.else ]
+ %tmp2 = load i32* %p.0 ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
+
+And functionattrs doesn't realize that the p.0 load points to function local
+memory.
+
+Also, functionattrs doesn't know about memcpy/memset. This function should be
+marked readnone, since it only twiddles local memory, but functionattrs doesn't
+handle memset/memcpy/memmove aggressively:
+
+struct X { int *p; int *q; };
+int foo() {
+ int i = 0, j = 1;
+ struct X x, y;
+ int **p;
+ y.p = &i;
+ x.q = &j;
+ p = __builtin_memcpy (&x, &y, sizeof (int *));
+ return **p;
+}
+
+//===---------------------------------------------------------------------===//
+
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index d82d928..5fa7e8c 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -402,18 +402,6 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
-bool SystemZInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB)const{
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case SystemZ::RET: // Return.
- case SystemZ::JMP: // Uncond branch.
- case SystemZ::JMPr: // Indirect branch.
- return true;
- default: return false;
- }
-}
-
bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
const TargetInstrDesc &TID = MI->getDesc();
if (!TID.isTerminator()) return false;
@@ -454,8 +442,8 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
}
// If the block has any instructions after a JMP, delete them.
- while (next(I) != MBB.end())
- next(I)->eraseFromParent();
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
Cond.clear();
FBB = 0;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index e16d704..ef3b39e 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -89,7 +89,6 @@ public:
const std::vector<CalleeSavedInfo> &CSI) const;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
- virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 4d1c01f..1318195 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -247,7 +247,7 @@ void SystemZRegisterInfo::emitPrologue(MachineFunction &MF) const {
.addReg(SystemZ::R15D);
// Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
I != E; ++I)
I->addLiveIn(SystemZ::R11D);
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index fc71bc3..9434a19 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -117,14 +117,6 @@ TargetAlignElem::operator==(const TargetAlignElem &rhs) const {
&& TypeBitWidth == rhs.TypeBitWidth);
}
-std::ostream &
-TargetAlignElem::dump(std::ostream &os) const {
- return os << AlignType
- << TypeBitWidth
- << ":" << (int) (ABIAlign * 8)
- << ":" << (int) (PrefAlign * 8);
-}
-
const TargetAlignElem TargetData::InvalidAlignmentElem =
TargetAlignElem::get((AlignTypeEnum) -1, 0, 0, 0);
@@ -323,11 +315,10 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
: Alignments[BestMatchIdx].PrefAlign;
}
-typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy;
-
-namespace llvm {
+namespace {
class StructLayoutMap : public AbstractTypeUser {
+ typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy;
LayoutInfoTy LayoutInfo;
/// refineAbstractType - The callback method invoked when an abstract type is
@@ -336,19 +327,11 @@ class StructLayoutMap : public AbstractTypeUser {
///
virtual void refineAbstractType(const DerivedType *OldTy,
const Type *) {
- const StructType *STy = dyn_cast<const StructType>(OldTy);
- if (!STy) {
- OldTy->removeAbstractTypeUser(this);
- return;
- }
-
- StructLayout *SL = LayoutInfo[STy];
- if (SL) {
- SL->~StructLayout();
- free(SL);
- LayoutInfo[STy] = NULL;
- }
-
+ const StructType *STy = cast<const StructType>(OldTy);
+ LayoutInfoTy::iterator Iter = LayoutInfo.find(STy);
+ Iter->second->~StructLayout();
+ free(Iter->second);
+ LayoutInfo.erase(Iter);
OldTy->removeAbstractTypeUser(this);
}
@@ -358,70 +341,43 @@ class StructLayoutMap : public AbstractTypeUser {
/// This method notifies ATU's when this occurs for a type.
///
virtual void typeBecameConcrete(const DerivedType *AbsTy) {
- const StructType *STy = dyn_cast<const StructType>(AbsTy);
- if (!STy) {
- AbsTy->removeAbstractTypeUser(this);
- return;
- }
-
- StructLayout *SL = LayoutInfo[STy];
- if (SL) {
- SL->~StructLayout();
- free(SL);
- LayoutInfo[STy] = NULL;
- }
-
+ const StructType *STy = cast<const StructType>(AbsTy);
+ LayoutInfoTy::iterator Iter = LayoutInfo.find(STy);
+ Iter->second->~StructLayout();
+ free(Iter->second);
+ LayoutInfo.erase(Iter);
AbsTy->removeAbstractTypeUser(this);
}
- bool insert(const Type *Ty) {
- if (Ty->isAbstract())
- Ty->addAbstractTypeUser(this);
- return true;
- }
-
public:
virtual ~StructLayoutMap() {
// Remove any layouts.
for (LayoutInfoTy::iterator
- I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I)
- if (StructLayout *SL = I->second) {
- SL->~StructLayout();
- free(SL);
- }
- }
+ I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I) {
+ const Type *Key = I->first;
+ StructLayout *Value = I->second;
- inline LayoutInfoTy::iterator begin() {
- return LayoutInfo.begin();
- }
- inline LayoutInfoTy::iterator end() {
- return LayoutInfo.end();
- }
- inline LayoutInfoTy::const_iterator begin() const {
- return LayoutInfo.begin();
- }
- inline LayoutInfoTy::const_iterator end() const {
- return LayoutInfo.end();
- }
+ if (Key->isAbstract())
+ Key->removeAbstractTypeUser(this);
- LayoutInfoTy::iterator find(const StructType *&Val) {
- return LayoutInfo.find(Val);
- }
- LayoutInfoTy::const_iterator find(const StructType *&Val) const {
- return LayoutInfo.find(Val);
+ Value->~StructLayout();
+ free(Value);
+ }
}
- bool erase(const StructType *&Val) {
- return LayoutInfo.erase(Val);
- }
- bool erase(LayoutInfoTy::iterator I) {
- return LayoutInfo.erase(I);
+ void InvalidateEntry(const StructType *Ty) {
+ LayoutInfoTy::iterator I = LayoutInfo.find(Ty);
+ if (I == LayoutInfo.end()) return;
+
+ I->second->~StructLayout();
+ free(I->second);
+ LayoutInfo.erase(I);
+
+ if (Ty->isAbstract())
+ Ty->removeAbstractTypeUser(this);
}
- StructLayout *&operator[](const Type *Key) {
- const StructType *STy = dyn_cast<const StructType>(Key);
- assert(STy && "Trying to access the struct layout map with a non-struct!");
- insert(STy);
+ StructLayout *&operator[](const StructType *STy) {
return LayoutInfo[STy];
}
@@ -429,17 +385,18 @@ public:
virtual void dump() const {}
};
-} // end namespace llvm
+} // end anonymous namespace
TargetData::~TargetData() {
- delete LayoutMap;
+ delete static_cast<StructLayoutMap*>(LayoutMap);
}
const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
if (!LayoutMap)
LayoutMap = new StructLayoutMap();
- StructLayout *&SL = (*LayoutMap)[Ty];
+ StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
+ StructLayout *&SL = (*STM)[Ty];
if (SL) return SL;
// Otherwise, create the struct layout. Because it is variable length, we
@@ -453,6 +410,10 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
SL = L;
new (L) StructLayout(Ty, *this);
+
+ if (Ty->isAbstract())
+ Ty->addAbstractTypeUser(STM);
+
return L;
}
@@ -463,15 +424,10 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const {
if (!LayoutMap) return; // No cache.
- DenseMap<const StructType*, StructLayout*>::iterator I = LayoutMap->find(Ty);
- if (I == LayoutMap->end()) return;
-
- I->second->~StructLayout();
- free(I->second);
- LayoutMap->erase(I);
+ StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
+ STM->InvalidateEntry(Ty);
}
-
std::string TargetData::getStringRepresentation() const {
std::string Result;
raw_string_ostream OS(Result);
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index a167118..684c61f 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -62,11 +62,6 @@ MCCodeEmitter *createX86MCCodeEmitter(const Target &, TargetMachine &TM);
///
FunctionPass *createEmitX86CodeToMemory();
-/// createX86MaxStackAlignmentCalculatorPass - This function returns a pass
-/// which calculates maximal stack alignment required for function
-///
-FunctionPass *createX86MaxStackAlignmentCalculatorPass();
-
extern Target TheX86_32Target, TheX86_64Target;
} // End llvm namespace
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index afd5525..5017af2 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/ADT/StringSet.h"
+#include "X86MachineFunctionInfo.h"
namespace llvm {
class X86MachineFunctionInfo;
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index d77f039..12d3d04 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -64,11 +64,18 @@ def RetCC_X86_32_C : CallingConv<[
// X86-32 FastCC return-value convention.
def RetCC_X86_32_Fast : CallingConv<[
// The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has
- // SSE2, otherwise it is the the C calling conventions.
+ // SSE2.
// This can happen when a float, 2 x float, or 3 x float vector is split by
// target lowering, and is returned in 1-3 sse regs.
CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+
+ // For integers, ECX can be used as an extra return register
+ CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>,
+ CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>,
+ CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>,
+
+ // Otherwise, it is the same as the common X86 calling convention.
CCDelegateTo<RetCC_X86Common>
]>;
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index a2fe9b0..044bd4b 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -289,7 +289,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
while (Start != BB.begin() && prior(Start) != PrevI) --Start;
errs() << "Inserted instructions:\n\t";
Start->print(errs(), &MF.getTarget());
- while (++Start != next(I)) {}
+ while (++Start != llvm::next(I)) {}
}
dumpStack();
);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index d80b8ec..0517b56 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -595,6 +595,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::TRUNCATE, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ setTruncStoreAction((MVT::SimpleValueType)VT,
+ (MVT::SimpleValueType)InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
}
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
@@ -671,8 +683,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom);
- setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v8i8, Expand);
setOperationAction(ISD::SELECT, MVT::v8i8, Promote);
setOperationAction(ISD::SELECT, MVT::v4i16, Promote);
setOperationAction(ISD::SELECT, MVT::v2i32, Promote);
@@ -3344,6 +3354,82 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
}
SDValue
+X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+ SelectionDAG &DAG) {
+
+ // Check if the scalar load can be widened into a vector load. And if
+ // the address is "base + cst" see if the cst can be "absorbed" into
+ // the shuffle mask.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
+ SDValue Ptr = LD->getBasePtr();
+ if (!ISD::isNormalLoad(LD) || LD->isVolatile())
+ return SDValue();
+ EVT PVT = LD->getValueType(0);
+ if (PVT != MVT::i32 && PVT != MVT::f32)
+ return SDValue();
+
+ int FI = -1;
+ int64_t Offset = 0;
+ if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FI = FINode->getIndex();
+ Offset = 0;
+ } else if (Ptr.getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ Offset = Ptr.getConstantOperandVal(1);
+ Ptr = Ptr.getOperand(0);
+ } else {
+ return SDValue();
+ }
+
+ SDValue Chain = LD->getChain();
+ // Make sure the stack object alignment is at least 16.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ if (DAG.InferPtrAlignment(Ptr) < 16) {
+ if (MFI->isFixedObjectIndex(FI)) {
+ // Can't change the alignment. Reference stack + offset explicitly
+ // if stack pointer is at least 16-byte aligned.
+ unsigned StackAlign = Subtarget->getStackAlignment();
+ if (StackAlign < 16)
+ return SDValue();
+ Offset = MFI->getObjectOffset(FI) + Offset;
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
+ getPointerTy());
+ Ptr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ DAG.getConstant(Offset & ~15, getPointerTy()));
+ Offset %= 16;
+ } else {
+ MFI->setObjectAlignment(FI, 16);
+ }
+ }
+
+ // (Offset % 16) must be multiple of 4. Then address is then
+ // Ptr + (Offset & ~15).
+ if (Offset < 0)
+ return SDValue();
+ if ((Offset % 16) & 3)
+ return SDValue();
+ int64_t StartOffset = Offset & ~15;
+ if (StartOffset)
+ Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(),
+ Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
+
+ int EltNo = (Offset - StartOffset) >> 2;
+ int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
+ EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
+ SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0);
+ // Canonicalize it to a v4i32 shuffle.
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getVectorShuffle(MVT::v4i32, dl, V1,
+ DAG.getUNDEF(MVT::v4i32), &Mask[0]));
+ }
+
+ return SDValue();
+}
+
+SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
// All zero's are handled with pxor, all one's are handled with pcmpeqd.
@@ -3486,8 +3572,19 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
- if (Values.size() == 1)
+ if (Values.size() == 1) {
+ if (EVTBits == 32) {
+ // Instead of a shuffle like this:
+ // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
+ // Check if it's possible to issue this instead.
+ // shuffle (vload ptr)), undef, <1, 1, 1, 1>
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue Item = Op.getOperand(Idx);
+ if (Op.getNode()->isOnlyUserOf(Item.getNode()))
+ return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
+ }
return SDValue();
+ }
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
@@ -4278,7 +4375,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
unsigned ShAmt = 0;
SDValue ShVal;
bool isShift = getSubtarget()->hasSSE2() &&
- isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+ isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
@@ -4815,6 +4912,7 @@ static SDValue
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
DebugLoc dl = GA->getDebugLoc();
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
@@ -4828,6 +4926,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue Ops[] = { Chain, TGA };
Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
}
+
+ // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+ MFI->setHasCalls(true);
+
SDValue Flag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
}
@@ -5648,6 +5750,17 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
return SDValue();
SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG);
+
+ // Use sbb x, x to materialize carry bit into a GPR.
+ // FIXME: Temporarily disabled since it breaks self-hosting. It's apparently
+ // miscompiling ARMISelDAGToDAG.cpp.
+ if (0 && !isFP && X86CC == X86::COND_B) {
+ return DAG.getNode(ISD::AND, dl, MVT::i8,
+ DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond),
+ DAG.getConstant(1, MVT::i8));
+ }
+
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86CC, MVT::i8), Cond);
}
@@ -5800,9 +5913,18 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
Cond = NewCond;
}
+ // Look pass (and (setcc_carry (cmp ...)), 1).
+ if (Cond.getOpcode() == ISD::AND &&
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (C && C->getAPIntValue() == 1)
+ Cond = Cond.getOperand(0);
+ }
+
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
- if (Cond.getOpcode() == X86ISD::SETCC) {
+ if (Cond.getOpcode() == X86ISD::SETCC ||
+ Cond.getOpcode() == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
@@ -5885,9 +6007,18 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
Cond = LowerXALUO(Cond, DAG);
#endif
+ // Look pass (and (setcc_carry (cmp ...)), 1).
+ if (Cond.getOpcode() == ISD::AND &&
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (C && C->getAPIntValue() == 1)
+ Cond = Cond.getOperand(0);
+ }
+
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
- if (Cond.getOpcode() == X86ISD::SETCC) {
+ if (Cond.getOpcode() == X86ISD::SETCC ||
+ Cond.getOpcode() == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
@@ -7274,6 +7405,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::COMI: return "X86ISD::COMI";
case X86ISD::UCOMI: return "X86ISD::UCOMI";
case X86ISD::SETCC: return "X86ISD::SETCC";
+ case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
case X86ISD::CMOV: return "X86ISD::CMOV";
case X86ISD::BRCOND: return "X86ISD::BRCOND";
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
@@ -8327,16 +8459,6 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
-static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
- const TargetLowering &TLI) {
- GlobalValue *GV;
- int64_t Offset = 0;
- if (TLI.isGAPlusOffset(Base, GV, Offset))
- return (GV->getAlignment() >= N && (Offset % N) == 0);
- // DAG combine handles the stack object case.
- return false;
-}
-
static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
EVT EltVT, LoadSDNode *&LDBase,
unsigned &LastLoadedElt,
@@ -8366,7 +8488,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
continue;
LoadSDNode *LD = cast<LoadSDNode>(Elt);
- if (!TLI.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i, MFI))
+ if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
return false;
LastLoadedElt = i;
}
@@ -8399,7 +8521,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
if (LastLoadedElt == NumElems - 1) {
- if (isBaseAlignmentOfN(16, LD->getBasePtr().getNode(), TLI))
+ if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16)
return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
LD->getSrcValue(), LD->getSrcValueOffset(),
LD->isVolatile());
@@ -8858,11 +8980,42 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
+ // since the result of setcc_c is all zero's or all ones.
+ if (N1C && N0.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
+ ((N00.getOpcode() == ISD::ANY_EXTEND ||
+ N00.getOpcode() == ISD::ZERO_EXTEND) &&
+ N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ APInt ShAmt = N1C->getAPIntValue();
+ Mask = Mask.shl(ShAmt);
+ if (Mask != 0)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ N00, DAG.getConstant(Mask, VT));
+ }
+ }
+
+ return SDValue();
+}
/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
/// when possible.
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector() && VT.isInteger() &&
+ N->getOpcode() == ISD::SHL)
+ return PerformSHLCombine(N, DAG);
+
// On X86 with SSE2 support, we can transform this to a vector shift if
// all elements are shifted by the same amount. We can't do this in legalize
// because the a constant vector is typically transformed to a constant pool
@@ -8870,7 +9023,6 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
if (!Subtarget->hasSSE2())
return SDValue();
- EVT VT = N->getValueType(0);
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
return SDValue();
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 7b4ab62..64bc70c 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -118,6 +118,10 @@ namespace llvm {
/// operand produced by a CMP instruction.
SETCC,
+ // Same as SETCC except it's materialized with a sbb and the value is all
+ // one's or all zero's.
+ SETCC_CARRY,
+
/// X86 conditional moves. Operand 0 and operand 1 are the two values
/// to select from. Operand 2 is the condition code, and operand 3 is the
/// flag operand produced by a CMP or TEST instruction. It also writes a
@@ -626,7 +630,9 @@ namespace llvm {
std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
bool isSigned);
-
+
+ SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+ SelectionDAG &DAG);
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index b5fa862..b6a2c05 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1333,6 +1333,15 @@ def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64]
X86_COND_NO, EFLAGS))]>, TB;
} // isTwoAddress
+// Use sbb to materialize carry flag into a GPR.
+let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in
+def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins),
+ "sbb{q}\t$dst, $dst",
+ [(set GR64:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>;
+
+def : Pat<(i64 (anyext (X86setcc_c X86_COND_B, EFLAGS))),
+ (SETB_C64r)>;
+
//===----------------------------------------------------------------------===//
// Conversion Instructions...
//
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index a37013d..1947d35 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -783,12 +783,14 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
return Reg;
// Check for post-frame index elimination operations
- return hasLoadFromStackSlot(MI, FrameIndex);
+ const MachineMemOperand *Dummy;
+ return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
return 0;
}
bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
int &FrameIndex) const {
for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
oe = MI->memoperands_end();
@@ -798,6 +800,7 @@ bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
if (const FixedStackPseudoSourceValue *Value =
dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
FrameIndex = Value->getFrameIndex();
+ MMO = *o;
return true;
}
}
@@ -819,12 +822,14 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
return Reg;
// Check for post-frame index elimination operations
- return hasStoreToStackSlot(MI, FrameIndex);
+ const MachineMemOperand *Dummy;
+ return hasStoreToStackSlot(MI, Dummy, FrameIndex);
}
return 0;
}
bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
int &FrameIndex) const {
for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
oe = MI->memoperands_end();
@@ -834,6 +839,7 @@ bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
if (const FixedStackPseudoSourceValue *Value =
dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
FrameIndex = Value->getFrameIndex();
+ MMO = *o;
return true;
}
}
@@ -1052,6 +1058,112 @@ static bool hasLiveCondCodeDef(MachineInstr *MI) {
return false;
}
+/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when
+/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting
+/// to a 32-bit superregister and then truncating back down to a 16-bit
+/// subregister.
+MachineInstr *
+X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
+ MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ MachineInstr *MI = MBBI;
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isKill = MI->getOperand(1).isKill();
+
+ unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::LEA64_32r : X86::LEA32r;
+ MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
+ unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ // This has the potential to cause partial register stall. e.g.
+ // movw (%rbp,%rcx,2), %dx
+ // leal -65(%rdx), %esi
+ // But testing has shown this *does* help performance in 64-bit mode (at
+ // least on modern x86 machines).
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+ MachineInstr *InsMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
+ .addReg(leaInReg)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(X86::SUBREG_16BIT);
+
+ MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
+ get(Opc), leaOutReg);
+ switch (MIOpc) {
+ default:
+ llvm_unreachable(0);
+ break;
+ case X86::SHL16ri: {
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ MIB.addReg(0).addImm(1 << ShAmt)
+ .addReg(leaInReg, RegState::Kill).addImm(0);
+ break;
+ }
+ case X86::INC16r:
+ case X86::INC64_16r:
+ addLeaRegOffset(MIB, leaInReg, true, 1);
+ break;
+ case X86::DEC16r:
+ case X86::DEC64_16r:
+ addLeaRegOffset(MIB, leaInReg, true, -1);
+ break;
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
+ break;
+ case X86::ADD16rr: {
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ unsigned leaInReg2 = 0;
+ MachineInstr *InsMI2 = 0;
+ if (Src == Src2) {
+ // ADD16rr %reg1028<kill>, %reg1028
+ // just a single insert_subreg.
+ addRegReg(MIB, leaInReg, true, leaInReg, false);
+ } else {
+ leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+ InsMI2 =
+ BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2)
+ .addReg(leaInReg2)
+ .addReg(Src2, getKillRegState(isKill2))
+ .addImm(X86::SUBREG_16BIT);
+ addRegReg(MIB, leaInReg, true, leaInReg2, true);
+ }
+ if (LV && isKill2 && InsMI2)
+ LV->replaceKillInstruction(Src2, MI, InsMI2);
+ break;
+ }
+ }
+
+ MachineInstr *NewMI = MIB;
+ MachineInstr *ExtMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(leaOutReg, RegState::Kill)
+ .addImm(X86::SUBREG_16BIT);
+
+ if (LV) {
+ // Update live variables
+ LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
+ LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
+ if (isKill)
+ LV->replaceKillInstruction(Src, MI, InsMI);
+ if (isDead)
+ LV->replaceKillInstruction(Dest, MI, ExtMI);
+ }
+
+ return ExtMI;
+}
+
/// convertToThreeAddress - This method must be implemented by targets that
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
/// may be able to convert a two-address instruction into a true
@@ -1077,7 +1189,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstr *NewMI = NULL;
// FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
// we have better subtarget support, enable the 16-bit LEA generation here.
+ // 16-bit LEA is also slow on Core2.
bool DisableLEA16 = true;
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
unsigned MIOpc = MI->getOpcode();
switch (MIOpc) {
@@ -1116,8 +1230,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
- unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ?
- X86::LEA64_32r : X86::LEA32r;
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(0).addImm(1 << ShAmt)
@@ -1131,51 +1244,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
- if (DisableLEA16) {
- // If 16-bit LEA is disabled, use 32-bit LEA via subregisters.
- MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
- unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
- ? X86::LEA64_32r : X86::LEA32r;
- unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
- unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
-
- // Build and insert into an implicit UNDEF value. This is OK because
- // well be shifting and then extracting the lower 16-bits.
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
- MachineInstr *InsMI =
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
- .addReg(leaInReg)
- .addReg(Src, getKillRegState(isKill))
- .addImm(X86::SUBREG_16BIT);
-
- NewMI = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg)
- .addReg(0).addImm(1 << ShAmt)
- .addReg(leaInReg, RegState::Kill)
- .addImm(0);
-
- MachineInstr *ExtMI =
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(leaOutReg, RegState::Kill)
- .addImm(X86::SUBREG_16BIT);
-
- if (LV) {
- // Update live variables
- LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
- LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
- if (isKill)
- LV->replaceKillInstruction(Src, MI, InsMI);
- if (isDead)
- LV->replaceKillInstruction(Dest, MI, ExtMI);
- }
- return ExtMI;
- } else {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(0).addImm(1 << ShAmt)
- .addReg(Src, getKillRegState(isKill))
- .addImm(0);
- }
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(0);
break;
}
default: {
@@ -1185,7 +1260,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (hasLiveCondCodeDef(MI))
return 0;
- bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
switch (MIOpc) {
default: return 0;
case X86::INC64r:
@@ -1202,7 +1276,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
case X86::INC16r:
case X86::INC64_16r:
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addReg(Dest, RegState::Define |
@@ -1223,7 +1298,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
case X86::DEC16r:
case X86::DEC64_16r:
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addReg(Dest, RegState::Define |
@@ -1246,7 +1322,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
break;
}
case X86::ADD16rr: {
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
@@ -1261,56 +1338,32 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD64ri32:
case X86::ADD64ri8:
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- if (MI->getOperand(2).isImm())
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, MI->getOperand(2).getImm());
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
break;
case X86::ADD32ri:
- case X86::ADD32ri8:
+ case X86::ADD32ri8: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- if (MI->getOperand(2).isImm()) {
- unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
Src, isKill, MI->getOperand(2).getImm());
- }
break;
+ }
case X86::ADD16ri:
case X86::ADD16ri8:
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- if (MI->getOperand(2).isImm())
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, MI->getOperand(2).getImm());
- break;
- case X86::SHL16ri:
- if (DisableLEA16) return 0;
- case X86::SHL32ri:
- case X86::SHL64ri: {
- assert(MI->getNumOperands() >= 3 && MI->getOperand(2).isImm() &&
- "Unknown shl instruction!");
- unsigned ShAmt = MI->getOperand(2).getImm();
- if (ShAmt == 1 || ShAmt == 2 || ShAmt == 3) {
- X86AddressMode AM;
- AM.Scale = 1 << ShAmt;
- AM.IndexReg = Src;
- unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r
- : (MIOpc == X86::SHL32ri
- ? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r);
- NewMI = addFullAddress(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)), AM);
- if (isKill)
- NewMI->getOperand(3).setIsKill(true);
- }
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
break;
}
- }
}
}
@@ -1571,14 +1624,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I = MBB.end();
while (I != MBB.begin()) {
--I;
- // Working from the bottom, when we see a non-terminator
- // instruction, we're done.
+
+ // Working from the bottom, when we see a non-terminator instruction, we're
+ // done.
if (!isBrAnalysisUnpredicatedTerminator(I, *this))
break;
- // A terminator that isn't a branch can't easily be handled
- // by this analysis.
+
+ // A terminator that isn't a branch can't easily be handled by this
+ // analysis.
if (!I->getDesc().isBranch())
return true;
+
// Handle unconditional branches.
if (I->getOpcode() == X86::JMP) {
if (!AllowModify) {
@@ -1587,10 +1643,12 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
}
// If the block has any instructions after a JMP, delete them.
- while (next(I) != MBB.end())
- next(I)->eraseFromParent();
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+
Cond.clear();
FBB = 0;
+
// Delete the JMP if it's equivalent to a fall-through.
if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
TBB = 0;
@@ -1598,14 +1656,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
I = MBB.end();
continue;
}
+
// TBB is used to indicate the unconditinal destination.
TBB = I->getOperand(0).getMBB();
continue;
}
+
// Handle conditional branches.
X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
if (BranchCode == X86::COND_INVALID)
return true; // Can't handle indirect branch.
+
// Working from the bottom, handle the first conditional branch.
if (Cond.empty()) {
FBB = TBB;
@@ -1613,24 +1674,26 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
Cond.push_back(MachineOperand::CreateImm(BranchCode));
continue;
}
- // Handle subsequent conditional branches. Only handle the case
- // where all conditional branches branch to the same destination
- // and their condition opcodes fit one of the special
- // multi-branch idioms.
+
+ // Handle subsequent conditional branches. Only handle the case where all
+ // conditional branches branch to the same destination and their condition
+ // opcodes fit one of the special multi-branch idioms.
assert(Cond.size() == 1);
assert(TBB);
- // Only handle the case where all conditional branches branch to
- // the same destination.
+
+ // Only handle the case where all conditional branches branch to the same
+ // destination.
if (TBB != I->getOperand(0).getMBB())
return true;
- X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
+
// If the conditions are the same, we can leave them alone.
+ X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
if (OldBranchCode == BranchCode)
continue;
- // If they differ, see if they fit one of the known patterns.
- // Theoretically we could handle more patterns here, but
- // we shouldn't expect to see them if instruction selection
- // has done a reasonable job.
+
+ // If they differ, see if they fit one of the known patterns. Theoretically,
+ // we could handle more patterns here, but we shouldn't expect to see them
+ // if instruction selection has done a reasonable job.
if ((OldBranchCode == X86::COND_NP &&
BranchCode == X86::COND_E) ||
(OldBranchCode == X86::COND_E &&
@@ -1643,6 +1706,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
BranchCode = X86::COND_NE_OR_P;
else
return true;
+
// Update the MachineOperand.
Cond[0].setImm(BranchCode);
}
@@ -2713,27 +2777,6 @@ unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
return I->second.first;
}
-bool X86InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case X86::TCRETURNri:
- case X86::TCRETURNdi:
- case X86::RET: // Return.
- case X86::RETI:
- case X86::TAILJMPd:
- case X86::TAILJMPr:
- case X86::TAILJMPm:
- case X86::JMP: // Uncond branch.
- case X86::JMP32r: // Indirect branch.
- case X86::JMP64r: // Indirect branch (64-bit).
- case X86::JMP32m: // Indirect branch through mem.
- case X86::JMP64m: // Indirect branch through mem (64-bit).
- return true;
- default: return false;
- }
-}
-
bool X86InstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 1 && "Invalid X86 branch condition!");
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index c6daa25..b83441d 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -457,11 +457,14 @@ public:
/// hasLoadFromStackSlot - If the specified machine instruction has
/// a load from a stack slot, return true along with the FrameIndex
- /// of the loaded stack slot. If not, return false. Unlike
+ /// of the loaded stack slot and the machine mem operand containing
+ /// the reference. If not, return false. Unlike
/// isLoadFromStackSlot, this returns true for any instructions that
/// loads from the stack. This is a hint only and may not catch all
/// cases.
- bool hasLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ bool hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
/// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
@@ -472,11 +475,13 @@ public:
/// hasStoreToStackSlot - If the specified machine instruction has a
/// store to a stack slot, return true along with the FrameIndex of
- /// the loaded stack slot. If not, return false. Unlike
- /// isStoreToStackSlot, this returns true for any instructions that
- /// loads from the stack. This is a hint only and may not catch all
- /// cases.
- bool hasStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ /// the loaded stack slot and the machine mem operand containing the
+ /// reference. If not, return false. Unlike isStoreToStackSlot,
+ /// this returns true for any instructions that loads from the
+ /// stack. This is a hint only and may not catch all cases.
+ bool hasStoreToStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
AliasAnalysis *AA) const;
@@ -595,7 +600,6 @@ public:
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex = 0) const;
- virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
@@ -633,6 +637,11 @@ public:
unsigned getGlobalBaseReg(MachineFunction *MF) const;
private:
+ MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
+ MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
unsigned OpNum,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 90ef1f4..3cc1853 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -87,6 +87,7 @@ def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
[SDNPHasChain]>;
def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
+def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC>;
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
[SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
@@ -816,7 +817,7 @@ def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
let neverHasSideEffects = 1 in
def LEA16r : I<0x8D, MRMSrcMem,
- (outs GR16:$dst), (ins i32mem:$src),
+ (outs GR16:$dst), (ins lea32mem:$src),
"lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
let isReMaterializable = 1 in
def LEA32r : I<0x8D, MRMSrcMem,
@@ -3059,6 +3060,21 @@ let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags
let Uses = [EFLAGS] in {
+// Use sbb to materialize carry bit.
+
+let Defs = [EFLAGS], isCodeGenOnly = 1 in {
+def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins),
+ "sbb{b}\t$dst, $dst",
+ [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins),
+ "sbb{w}\t$dst, $dst",
+ [(set GR16:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>,
+ OpSize;
+def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins),
+ "sbb{l}\t$dst, $dst",
+ [(set GR32:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>;
+} // isCodeGenOnly
+
def SETEr : I<0x94, MRM0r,
(outs GR8 :$dst), (ins),
"sete\t$dst",
@@ -4169,6 +4185,12 @@ def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1),
GR16:$src2, (i8 imm:$amt2)), addr:$dst),
(SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
+// (anyext (setcc_carry)) -> (zext (setcc_carry))
+def : Pat<(i16 (anyext (X86setcc_c X86_COND_B, EFLAGS))),
+ (SETB_C16r)>;
+def : Pat<(i32 (anyext (X86setcc_c X86_COND_B, EFLAGS))),
+ (SETB_C32r)>;
+
//===----------------------------------------------------------------------===//
// EFLAGS-defining Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index dfdd4ce..62841f8 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2083,7 +2083,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (pshufd:$src2
- (bc_v4i32(memopv2i64 addr:$src1)),
+ (bc_v4i32 (memopv2i64 addr:$src1)),
(undef))))]>;
}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 33852bd..d96aafd 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -423,21 +423,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
- unsigned MaxAlign = 0;
-
- for (int i = FFI->getObjectIndexBegin(),
- e = FFI->getObjectIndexEnd(); i != e; ++i) {
- if (FFI->isDeadObjectIndex(i))
- continue;
-
- unsigned Align = FFI->getObjectAlignment(i);
- MaxAlign = std::max(MaxAlign, Align);
- }
-
- return MaxAlign;
-}
-
/// hasFP - Return true if the specified function should have a dedicated frame
/// pointer register. This is true if the function has variable sized allocas
/// or if frame pointer elimination is disabled.
@@ -638,10 +623,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
- unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
- calculateMaxStackAlignment(MFI));
-
- MFI->setMaxAlignment(MaxAlign);
+ MFI->calculateMaxStackAlignment();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
@@ -741,7 +723,7 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB,
if (MBBI == MBB.end()) return;
- MachineBasicBlock::iterator NI = next(MBBI);
+ MachineBasicBlock::iterator NI = llvm::next(MBBI);
if (NI == MBB.end()) return;
unsigned Opc = NI->getOpcode();
@@ -775,7 +757,7 @@ static int mergeSPUpdates(MachineBasicBlock &MBB,
return 0;
MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
- MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI);
+ MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
unsigned Opc = PI->getOpcode();
int Offset = 0;
@@ -1001,7 +983,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
}
// Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
I != E; ++I)
I->addLiveIn(FramePtr);
@@ -1482,45 +1464,3 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
}
#include "X86GenRegisterInfo.inc"
-
-namespace {
- struct MSAC : public MachineFunctionPass {
- static char ID;
- MSAC() : MachineFunctionPass(&ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF) {
- MachineFrameInfo *FFI = MF.getFrameInfo();
- MachineRegisterInfo &RI = MF.getRegInfo();
-
- // Calculate max stack alignment of all already allocated stack objects.
- unsigned MaxAlign = calculateMaxStackAlignment(FFI);
-
- // Be over-conservative: scan over all vreg defs and find, whether vector
- // registers are used. If yes - there is probability, that vector register
- // will be spilled and thus stack needs to be aligned properly.
- for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
- RegNum < RI.getLastVirtReg(); ++RegNum)
- MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
-
- if (FFI->getMaxAlignment() == MaxAlign)
- return false;
-
- FFI->setMaxAlignment(MaxAlign);
- return true;
- }
-
- virtual const char *getPassName() const {
- return "X86 Maximal Stack Alignment Calculator";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-
- char MSAC::ID = 0;
-}
-
-FunctionPass*
-llvm::createX86MaxStackAlignmentCalculatorPass() { return new MSAC(); }
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 661f560..75cdbad 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -367,5 +367,5 @@ bool X86Subtarget::enablePostRAScheduler(
RegClassVector& CriticalPathRCs) const {
Mode = TargetSubtarget::ANTIDEP_CRITICAL;
CriticalPathRCs.clear();
- return OptLevel >= CodeGenOpt::Default;
+ return OptLevel >= CodeGenOpt::Aggressive;
}
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 0cda8bc..0152121 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -163,7 +163,7 @@ bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
- PM.add(createX86MaxStackAlignmentCalculatorPass());
+ PM.add(createMaxStackAlignmentCalculatorPass());
return false; // -print-machineinstr shouldn't print after this.
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index e616fe6..5a54844 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -453,26 +453,6 @@ bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-/// BlockHasNoFallThrough - Analyse if MachineBasicBlock does not
-/// fall-through into its successor block.
-bool XCoreInstrInfo::
-BlockHasNoFallThrough(const MachineBasicBlock &MBB) const
-{
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case XCore::RETSP_u6: // Return.
- case XCore::RETSP_lu6:
- case XCore::BAU_1r: // Indirect branch.
- case XCore::BRFU_u6: // Uncond branch.
- case XCore::BRFU_lu6:
- case XCore::BRBU_u6:
- case XCore::BRBU_lu6:
- return true;
- default: return false;
- }
-}
-
/// ReverseBranchCondition - Return the inverse opcode of the
/// specified Branch instruction.
bool XCoreInstrInfo::
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 24230ac..3e0a765 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -87,8 +87,6 @@ public:
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI) const;
- virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-
virtual bool ReverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const;
};
OpenPOWER on IntegriCloud