diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2009-12-15 18:09:07 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2009-12-15 18:09:07 +0000 |
commit | 40a6fcdb85efd93fe0e36c9552cfb0b18b5eacd6 (patch) | |
tree | 076117cdf3579003f07cad4cdf0593347ce58150 /lib/Target | |
parent | e7908924d847e63b02bc82bfaa1709ab9c774dcd (diff) | |
download | FreeBSD-src-40a6fcdb85efd93fe0e36c9552cfb0b18b5eacd6.zip FreeBSD-src-40a6fcdb85efd93fe0e36c9552cfb0b18b5eacd6.tar.gz |
Update LLVM to 91430.
Diffstat (limited to 'lib/Target')
65 files changed, 1761 insertions, 676 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index ff1980d..21445ad 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -109,7 +109,6 @@ FunctionPass *createNEONPreAllocPass(); FunctionPass *createNEONMoveFixPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); -FunctionPass *createARMMaxStackAlignmentCalculatorPass(); extern Target TheARMTarget, TheThumbTarget; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index c95d4c8..1aae369 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -418,11 +418,13 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { return true; } -/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing +/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. +DISABLE_INLINE static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, - unsigned JTI) DISABLE_INLINE; + unsigned JTI); static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, unsigned JTI) { + assert(JTI < JT.size()); return JT[JTI].MBBs.size(); } @@ -467,6 +469,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return MI->getOperand(2).getImm(); case ARM::Int_eh_sjlj_setjmp: return 24; + case ARM::tInt_eh_sjlj_setjmp: + return 22; case ARM::t2Int_eh_sjlj_setjmp: return 22; case ARM::BR_JTr: @@ -755,7 +759,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, assert((RC == ARM::QPRRegisterClass || RC == ARM::QPR_VFP2RegisterClass || RC == ARM::QPR_8RegisterClass) && "Unknown regclass!"); - // FIXME: Neon instructions should support predicates if (Align >= 16 && (getRegisterInfo().needsStackRealignment(MF))) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 282e30c..78d9135 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -92,6 +92,8 @@ namespace ARMII { StMiscFrm = 9 << FormShift, LdStMulFrm = 10 << FormShift, + LdStExFrm = 28 << FormShift, + // Miscellaneous arithmetic instructions ArithMiscFrm = 11 << FormShift, @@ -190,9 +192,6 @@ public: // if there is not such an opcode. virtual unsigned getUnindexedOpcode(unsigned Opc) const =0; - // Return true if the block does not fall through. - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const =0; - virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 653328d..9b5f79f 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -471,21 +471,6 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, } } -static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) { - unsigned MaxAlign = 0; - - for (int i = FFI->getObjectIndexBegin(), - e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) - continue; - - unsigned Align = FFI->getObjectAlignment(i); - MaxAlign = std::max(MaxAlign, Align); - } - - return MaxAlign; -} - /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. @@ -585,16 +570,21 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, SmallVector<unsigned, 4> UnspilledCS2GPRs; ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - MachineFrameInfo *MFI = MF.getFrameInfo(); // Calculate and set max stack object alignment early, so we can decide // whether we will need stack realignment (and thus FP). if (RealignStack) { - unsigned MaxAlign = std::max(MFI->getMaxAlignment(), - calculateMaxStackAlignment(MFI)); - MFI->setMaxAlignment(MaxAlign); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->calculateMaxStackAlignment(); } + // Spill R4 if Thumb2 function requires stack realignment - it will be used as + // scratch register. + // FIXME: It will be better just to find spare register here. + if (needsStackRealignment(MF) && + AFI->isThumb2Function()) + MF.getRegInfo().setPhysRegUsed(ARM::R4); + // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. const unsigned *CSRegs = getCalleeSavedRegs(); @@ -1368,14 +1358,30 @@ emitPrologue(MachineFunction &MF) const { // If we need dynamic stack realignment, do it here. if (needsStackRealignment(MF)) { - unsigned Opc; unsigned MaxAlign = MFI->getMaxAlignment(); assert (!AFI->isThumb1OnlyFunction()); - Opc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri; - - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), ARM::SP) + if (!AFI->isThumbFunction()) { + // Emit bic sp, sp, MaxAlign + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, + TII.get(ARM::BICri), ARM::SP) .addReg(ARM::SP, RegState::Kill) .addImm(MaxAlign-1))); + } else { + // We cannot use sp as source/dest register here, thus we're emitting the + // following sequence: + // mov r4, sp + // bic r4, r4, MaxAlign + // mov sp, r4 + // FIXME: It will be better just to find spare register here. + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R4) + .addReg(ARM::SP, RegState::Kill); + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, + TII.get(ARM::t2BICri), ARM::R4) + .addReg(ARM::R4, RegState::Kill) + .addImm(MaxAlign-1))); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) + .addReg(ARM::R4, RegState::Kill); + } } } @@ -1479,48 +1485,4 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); } -namespace { - struct MaximalStackAlignmentCalculator : public MachineFunctionPass { - static char ID; - MaximalStackAlignmentCalculator() : MachineFunctionPass(&ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF) { - MachineFrameInfo *FFI = MF.getFrameInfo(); - MachineRegisterInfo &RI = MF.getRegInfo(); - - // Calculate max stack alignment of all already allocated stack objects. - unsigned MaxAlign = calculateMaxStackAlignment(FFI); - - // Be over-conservative: scan over all vreg defs and find, whether vector - // registers are used. If yes - there is probability, that vector register - // will be spilled and thus stack needs to be aligned properly. - for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; - RegNum < RI.getLastVirtReg(); ++RegNum) - MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment()); - - if (FFI->getMaxAlignment() == MaxAlign) - return false; - - FFI->setMaxAlignment(MaxAlign); - return true; - } - - virtual const char *getPassName() const { - return "ARM Stack Required Alignment Auto-Detector"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - }; - - char MaximalStackAlignmentCalculator::ID = 0; -} - -FunctionPass* -llvm::createARMMaxStackAlignmentCalculatorPass() { - return new MaximalStackAlignmentCalculator(); -} - #include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index e59a315..acd30d2 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -418,10 +418,10 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF, static bool BBHasFallthrough(MachineBasicBlock *MBB) { // Get the next machine basic block in the function. MachineFunction::iterator MBBI = MBB; - if (next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function. + if (llvm::next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function. return false; - MachineBasicBlock *NextBB = next(MBBI); + MachineBasicBlock *NextBB = llvm::next(MBBI); for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) if (*I == NextBB) @@ -760,7 +760,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { CompareMBBNumbers); MachineBasicBlock* WaterBB = *IP; if (WaterBB == OrigBB) - WaterList.insert(next(IP), NewBB); + WaterList.insert(llvm::next(IP), NewBB); else WaterList.insert(IP, OrigBB); NewWaterList.insert(OrigBB); @@ -887,7 +887,7 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) { void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta) { - MachineFunction::iterator MBBI = BB; MBBI = next(MBBI); + MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI); for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs(); i < e; ++i) { BBOffsets[i] += delta; @@ -929,7 +929,7 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, if (delta==0) return; } - MBBI = next(MBBI); + MBBI = llvm::next(MBBI); } } @@ -1096,7 +1096,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, DEBUG(errs() << "Split at end of block\n"); if (&UserMBB->back() == UserMI) assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!"); - NewMBB = next(MachineFunction::iterator(UserMBB)); + NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); // Add an unconditional branch from UserMBB to fallthrough block. // Record it for branch lengthening; this new branch will not get out of // range, but if the preceding conditional branch is out of range, the @@ -1144,7 +1144,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); Offset < BaseInsertOffset; Offset += TII->GetInstSizeInBytes(MI), - MI = next(MI)) { + MI = llvm::next(MI)) { if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) { CPUser &U = CPUsers[CPUIndex]; if (!OffsetIsInRange(Offset, EndInsertOffset, @@ -1204,7 +1204,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, NewWaterList.insert(NewIsland); } // The new CPE goes before the following block (NewMBB). - NewMBB = next(MachineFunction::iterator(WaterBB)); + NewMBB = llvm::next(MachineFunction::iterator(WaterBB)); } else { // No water found. @@ -1406,7 +1406,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { NumCBrFixed++; if (BMI != MI) { - if (next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && + if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && BMI->getOpcode() == Br.UncondBr) { // Last MI in the BB is an unconditional branch. Can we simply invert the // condition and swap destinations: @@ -1433,12 +1433,12 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { // branch to the destination. int delta = TII->GetInstSizeInBytes(&MBB->back()); BBSizes[MBB->getNumber()] -= delta; - MachineBasicBlock* SplitBB = next(MachineFunction::iterator(MBB)); + MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB)); AdjustBBOffsetsAfter(SplitBB, -delta); MBB->back().eraseFromParent(); // BBOffsets[SplitBB] is wrong temporarily, fixed below } - MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber() << " also invert condition and change dest. to BB#" diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index c929c54..1b8727d 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -48,7 +48,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr &MI = *MBBI; - MachineBasicBlock::iterator NMBBI = next(MBBI); + MachineBasicBlock::iterator NMBBI = llvm::next(MBBI); unsigned Opcode = MI.getOpcode(); switch (Opcode) { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c839fc6..655c762 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include <sstream> using namespace llvm; @@ -377,7 +378,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); else setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); @@ -500,6 +501,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; + case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; + case ARMISD::SYNCBARRIER: return "ARMISD::SYNCBARRIER"; + case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCGE: return "ARMISD::VCGE"; case ARMISD::VCGEU: return "ARMISD::VCGEU"; @@ -1470,6 +1474,28 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { } } +static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + DebugLoc dl = Op.getDebugLoc(); + SDValue Op5 = Op.getOperand(5); + SDValue Res; + unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue(); + if (isDeviceBarrier) { + if (Subtarget->hasV7Ops()) + Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0)); + else + Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); + } else { + if (Subtarget->hasV7Ops()) + Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); + else + Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); + } + return Res; +} + static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, unsigned VarArgsFrameIndex) { // vastart just stores the address of the VarArgsFrameIndex slot into the @@ -2528,6 +2554,25 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, return true; } +/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of +/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". +/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. +static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, + unsigned &WhichResult) { + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i < NumElts; i += 2) { + if ((unsigned) M[i] != i + WhichResult || + (unsigned) M[i+1] != i + WhichResult) + return false; + } + return true; +} + static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); @@ -2548,6 +2593,33 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, return true; } +/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of +/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". +/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, +static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, + unsigned &WhichResult) { + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + + unsigned Half = VT.getVectorNumElements() / 2; + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned j = 0; j != 2; ++j) { + unsigned Idx = WhichResult; + for (unsigned i = 0; i != Half; ++i) { + if ((unsigned) M[i + j * Half] != Idx) + return false; + Idx += 2; + } + } + + // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. + if (VT.is64BitVector() && EltSz == 32) + return false; + + return true; +} + static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); @@ -2571,6 +2643,33 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, return true; } +/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of +/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". +/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. +static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, + unsigned &WhichResult) { + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + unsigned Idx = WhichResult * NumElts / 2; + for (unsigned i = 0; i != NumElts; i += 2) { + if ((unsigned) M[i] != Idx || + (unsigned) M[i+1] != Idx) + return false; + Idx += 1; + } + + // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. + if (VT.is64BitVector() && EltSz == 32) + return false; + + return true; +} + + static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) { // Canonicalize all-zeros and all-ones vectors. ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode()); @@ -2683,7 +2782,10 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isVEXTMask(M, VT, ReverseVEXT, Imm) || isVTRNMask(M, VT, WhichResult) || isVUZPMask(M, VT, WhichResult) || - isVZIPMask(M, VT, WhichResult)); + isVZIPMask(M, VT, WhichResult) || + isVTRN_v_undef_Mask(M, VT, WhichResult) || + isVUZP_v_undef_Mask(M, VT, WhichResult) || + isVZIP_v_undef_Mask(M, VT, WhichResult)); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit @@ -2815,6 +2917,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), V1, V2).getValue(WhichResult); + if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + // If the shuffle is not directly supported and it has 4 elements, use // the PerfectShuffle-generated table to synthesize it from other shuffles. if (VT.getVectorNumElements() == 4 && @@ -2886,6 +2998,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); + case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: @@ -2938,14 +3051,233 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, //===----------------------------------------------------------------------===// MachineBasicBlock * +ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size) const { + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned oldval = MI->getOperand(2).getReg(); + unsigned newval = MI->getOperand(3).getReg(); + unsigned scratch = BB->getParent()->getRegInfo() + .createVirtualRegister(ARM::GPRRegisterClass); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); + + unsigned ldrOpc, strOpc; + switch (Size) { + default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); + case 1: + ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; + strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB; + break; + case 2: + ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; + strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; + break; + case 4: + ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; + strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; + break; + } + + MachineFunction *MF = BB->getParent(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; // insert the new blocks after the current block + + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, exitMBB); + exitMBB->transferSuccessors(BB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB->addSuccessor(loop1MBB); + + // loop1MBB: + // ldrex dest, [ptr] + // cmp dest, oldval + // bne exitMBB + BB = loop1MBB; + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) + .addReg(dest).addReg(oldval)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(exitMBB); + + // loop2MBB: + // strex scratch, newval, [ptr] + // cmp scratch, #0 + // bne loop1MBB + BB = loop2MBB; + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval) + .addReg(ptr)); + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(scratch).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + BB->addSuccessor(loop1MBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + return BB; +} + +MachineBasicBlock * +ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size, unsigned BinOpcode) const { + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *F = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned incr = MI->getOperand(2).getReg(); + DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); + unsigned ldrOpc, strOpc; + switch (Size) { + default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); + case 1: + ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; + strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB; + break; + case 2: + ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; + strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; + break; + case 4: + ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; + strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; + break; + } + + MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loopMBB); + F->insert(It, exitMBB); + exitMBB->transferSuccessors(BB); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + unsigned scratch2 = (!BinOpcode) ? incr : + RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldrex dest, ptr + // <binop> scratch2, dest, incr + // strex scratch, scratch2, ptr + // cmp scratch, #0 + // bne- loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + if (BinOpcode) { + // operand order needs to go the other way for NAND + if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) + AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). + addReg(incr).addReg(dest)).addReg(0); + else + AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). + addReg(dest).addReg(incr)).addReg(0); + } + + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) + .addReg(ptr)); + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(scratch).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + return BB; +} + +MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { default: + MI->dump(); llvm_unreachable("Unexpected instr type to insert"); + + case ARM::ATOMIC_LOAD_ADD_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); + case ARM::ATOMIC_LOAD_ADD_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); + case ARM::ATOMIC_LOAD_ADD_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); + + case ARM::ATOMIC_LOAD_AND_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); + case ARM::ATOMIC_LOAD_AND_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); + case ARM::ATOMIC_LOAD_AND_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); + + case ARM::ATOMIC_LOAD_OR_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); + case ARM::ATOMIC_LOAD_OR_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); + case ARM::ATOMIC_LOAD_OR_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); + + case ARM::ATOMIC_LOAD_XOR_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); + case ARM::ATOMIC_LOAD_XOR_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); + case ARM::ATOMIC_LOAD_XOR_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); + + case ARM::ATOMIC_LOAD_NAND_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); + case ARM::ATOMIC_LOAD_NAND_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); + case ARM::ATOMIC_LOAD_NAND_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); + + case ARM::ATOMIC_LOAD_SUB_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); + case ARM::ATOMIC_LOAD_SUB_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); + case ARM::ATOMIC_LOAD_SUB_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); + + case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); + case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); + case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); + + case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); + case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); + case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); + case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the @@ -3935,6 +4267,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return std::make_pair(0U, ARM::SPRRegisterClass); if (VT == MVT::f64) return std::make_pair(0U, ARM::DPRRegisterClass); + if (VT.getSizeInBits() == 128) + return std::make_pair(0U, ARM::QPRRegisterClass); break; } } @@ -3973,6 +4307,9 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, ARM::D4, ARM::D5, ARM::D6, ARM::D7, ARM::D8, ARM::D9, ARM::D10,ARM::D11, ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); + if (VT.getSizeInBits() == 128) + return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, + ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0); break; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 4f31f8a..e1b3348 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -72,6 +72,9 @@ namespace llvm { DYN_ALLOC, // Dynamic allocation on the stack. + MEMBARRIER, // Memory barrier + SYNCBARRIER, // Memory sync barrier + VCEQ, // Vector compare equal. VCGE, // Vector compare greater than or equal. VCGEU, // Vector compare unsigned greater than or equal. @@ -328,6 +331,15 @@ namespace llvm { SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl); + + MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size) const; + MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size, + unsigned BinOpcode) const; + }; } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index e76e93c..cf0edff 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -33,6 +33,8 @@ def LdMiscFrm : Format<8>; def StMiscFrm : Format<9>; def LdStMulFrm : Format<10>; +def LdStExFrm : Format<28>; + def ArithMiscFrm : Format<11>; def ExtFrm : Format<12>; @@ -199,6 +201,19 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } +// A few are not predicable +class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz, + IndexMode im, Format f, InstrItinClass itin, + string opc, string asm, string cstr, + list<dag> pattern> + : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = !strconcat(opc, asm); + let Pattern = pattern; + let isPredicable = 0; + list<Predicate> Predicates = [IsARM]; +} // Same as I except it can optionally modify CPSR. Note it's modeled as // an input operand since by default it's a zero register. It will @@ -239,6 +254,10 @@ class AXI<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin, asm, "", pattern>; +class AInoP<dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : InoP<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin, + opc, asm, "", pattern>; // Ctrl flow instructions class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin, @@ -264,6 +283,28 @@ class JTI<dag oops, dag iops, InstrItinClass itin, : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin, asm, "", pattern>; + +// Atomic load/store instructions + +class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin, + opc, asm, "", pattern> { + let Inst{27-23} = 0b00011; + let Inst{22-21} = opcod; + let Inst{20} = 1; + let Inst{11-0} = 0b111110011111; +} +class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin, + opc, asm, "", pattern> { + let Inst{27-23} = 0b00011; + let Inst{22-21} = opcod; + let Inst{20} = 0; + let Inst{11-4} = 0b11111001; +} + // addrmode1 instructions class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -967,6 +1008,17 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<Predicate> Predicates = [IsThumb2]; } +class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, + InstrItinClass itin, + string asm, string cstr, list<dag> pattern> + : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = asm; + let Pattern = pattern; + list<Predicate> Predicates = [IsThumb1Only]; +} + class T2I<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>; diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 87bb12b..85f6b40 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -60,25 +60,6 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } -bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case ARM::BX_RET: // Return. - case ARM::LDM_RET: - case ARM::B: - case ARM::BRIND: - case ARM::BR_JTr: // Jumptable branch. - case ARM::BR_JTm: // Jumptable branch through mem. - case ARM::BR_JTadd: // Jumptable branch add to pc. - return true; - default: - break; - } - - return false; -} - void ARMInstrInfo:: reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 4319577..d4199d1 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -32,9 +32,6 @@ public: // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; - // Return true if the block does not fall through. - bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; - void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 7516d3c..e14696a 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -46,6 +46,11 @@ def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>; +def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>; +def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>; +def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; @@ -93,6 +98,15 @@ def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>; def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>; +def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7, + [SDNPHasChain]>; +def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7, + [SDNPHasChain]>; +def ARMMemBarrierV6 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6, + [SDNPHasChain]>; +def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6, + [SDNPHasChain]>; + //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // @@ -772,6 +786,7 @@ let isBranch = 1, isTerminator = 1 in { def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), IIC_Br, "mov\tpc, $target \n$jt", [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> { + let Inst{11-4} = 0b00000000; let Inst{15-12} = 0b1111; let Inst{20} = 0; // S Bit let Inst{24-21} = 0b1101; @@ -1561,6 +1576,189 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst), let Inst{25} = 1; } +//===----------------------------------------------------------------------===// +// Atomic operations intrinsics +// + +// memory barriers protect the atomic sequences +let hasSideEffects = 1 in { +def Int_MemBarrierV7 : AInoP<(outs), (ins), + Pseudo, NoItinerary, + "dmb", "", + [(ARMMemBarrierV7)]>, + Requires<[IsARM, HasV7]> { + let Inst{31-4} = 0xf57ff05; + // FIXME: add support for options other than a full system DMB + let Inst{3-0} = 0b1111; +} + +def Int_SyncBarrierV7 : AInoP<(outs), (ins), + Pseudo, NoItinerary, + "dsb", "", + [(ARMSyncBarrierV7)]>, + Requires<[IsARM, HasV7]> { + let Inst{31-4} = 0xf57ff04; + // FIXME: add support for options other than a full system DSB + let Inst{3-0} = 0b1111; +} + +def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero), + Pseudo, NoItinerary, + "mcr", "\tp15, 0, $zero, c7, c10, 5", + [(ARMMemBarrierV6 GPR:$zero)]>, + Requires<[IsARM, HasV6]> { + // FIXME: add support for options other than a full system DMB + // FIXME: add encoding +} + +def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero), + Pseudo, NoItinerary, + "mcr", "\tp15, 0, $zero, c7, c10, 4", + [(ARMSyncBarrierV6 GPR:$zero)]>, + Requires<[IsARM, HasV6]> { + // FIXME: add support for options other than a full system DSB + // FIXME: add encoding +} +} + +let usesCustomInserter = 1 in { + let Uses = [CPSR] in { + def ATOMIC_LOAD_ADD_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_ADD_I8 PSEUDO!", + [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_SUB_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_SUB_I8 PSEUDO!", + [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_AND_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_AND_I8 PSEUDO!", + [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_OR_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_OR_I8 PSEUDO!", + [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_XOR_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_XOR_I8 PSEUDO!", + [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_NAND_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_NAND_I8 PSEUDO!", + [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_ADD_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_ADD_I16 PSEUDO!", + [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_SUB_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_SUB_I16 PSEUDO!", + [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_AND_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_AND_I16 PSEUDO!", + [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_OR_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_OR_I16 PSEUDO!", + [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_XOR_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_XOR_I16 PSEUDO!", + [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_NAND_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_NAND_I16 PSEUDO!", + [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_ADD_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!", + [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_SUB_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_SUB_I32 PSEUDO!", + [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_AND_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_AND_I32 PSEUDO!", + [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_OR_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_OR_I32 PSEUDO!", + [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_XOR_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_XOR_I32 PSEUDO!", + [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_NAND_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_NAND_I32 PSEUDO!", + [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>; + + def ATOMIC_SWAP_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, + "${:comment} ATOMIC_SWAP_I8 PSEUDO!", + [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>; + def ATOMIC_SWAP_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, + "${:comment} ATOMIC_SWAP_I16 PSEUDO!", + [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>; + def ATOMIC_SWAP_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, + "${:comment} ATOMIC_SWAP_I32 PSEUDO!", + [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>; + + def ATOMIC_CMP_SWAP_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, + "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!", + [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>; + def ATOMIC_CMP_SWAP_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, + "${:comment} ATOMIC_CMP_SWAP_I16 PSEUDO!", + [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>; + def ATOMIC_CMP_SWAP_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, + "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!", + [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>; +} +} + +let mayLoad = 1 in { +def LDREXB : AIldrex<0b10, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary, + "ldrexb", "\t$dest, [$ptr]", + []>; +def LDREXH : AIldrex<0b11, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary, + "ldrexh", "\t$dest, [$ptr]", + []>; +def LDREX : AIldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary, + "ldrex", "\t$dest, [$ptr]", + []>; +def LDREXD : AIldrex<0b01, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr), + NoItinerary, + "ldrexd", "\t$dest, $dest2, [$ptr]", + []>; +} + +let mayStore = 1 in { +def STREXB : AIstrex<0b10, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), + NoItinerary, + "strexb", "\t$success, $src, [$ptr]", + []>; +def STREXH : AIstrex<0b11, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), + NoItinerary, + "strexh", "\t$success, $src, [$ptr]", + []>; +def STREX : AIstrex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), + NoItinerary, + "strex", "\t$success, $src, [$ptr]", + []>; +def STREXD : AIstrex<0b01, (outs GPR:$success), + (ins GPR:$src, GPR:$src2, GPR:$ptr), + NoItinerary, + "strexd", "\t$success, $src, $src2, [$ptr]", + []>; +} //===----------------------------------------------------------------------===// // TLS Instructions diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 3166931..61b7705 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -152,7 +152,7 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), let Inst{24} = 0; // P bit let Inst{23} = 1; // U bit let Inst{20} = 1; - let Inst{11-9} = 0b101; + let Inst{11-8} = 0b1011; } // Use vstmia to store a Q register as a D register pair. @@ -164,7 +164,7 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), let Inst{24} = 0; // P bit let Inst{23} = 1; // U bit let Inst{20} = 0; - let Inst{11-9} = 0b101; + let Inst{11-8} = 0b1011; } // VLD1 : Vector Load (multiple single elements) diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index b5956a3..9306bdb 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -669,6 +669,35 @@ let isCall = 1, [(set R0, ARMthread_pointer)]>; } +// SJLJ Exception handling intrinsics +// eh_sjlj_setjmp() is an instruction sequence to store the return +// address and save #0 in R0 for the non-longjmp case. +// Since by its nature we may be coming from some other function to get +// here, and we're using the stack frame for the containing function to +// save/restore registers, we can't keep anything live in regs across +// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon +// when we get here from a longjmp(). We force everthing out of registers +// except for our own input by listing the relevant registers in Defs. By +// doing so, we also cause the prologue/epilogue code to actively preserve +// all of the callee-saved resgisters, which is exactly what we want. +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in { + def tInt_eh_sjlj_setjmp : ThumbXI<(outs), (ins GPR:$src), + AddrModeNone, SizeSpecial, NoItinerary, + "mov\tr12, r1\t@ begin eh.setjmp\n" + "\tmov\tr1, sp\n" + "\tstr\tr1, [$src, #8]\n" + "\tadr\tr1, 0f\n" + "\tadds\tr1, #1\n" + "\tstr\tr1, [$src, #4]\n" + "\tmov\tr1, r12\n" + "\tmovs\tr0, #0\n" + "\tb\t1f\n" + ".align 2\n" + "0:\tmovs\tr0, #1\t@ end eh.setjmp\n" + "1:", "", + [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>; +} //===----------------------------------------------------------------------===// // Non-Instruction Patterns // diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 9489815..949ce73 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1065,6 +1065,68 @@ def t2MOVCCror : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), RegConstraint<"$false = $dst">; //===----------------------------------------------------------------------===// +// Atomic operations intrinsics +// + +// memory barriers protect the atomic sequences +let hasSideEffects = 1 in { +def t2Int_MemBarrierV7 : AInoP<(outs), (ins), + Pseudo, NoItinerary, + "dmb", "", + [(ARMMemBarrierV7)]>, + Requires<[IsThumb2]> { + // FIXME: add support for options other than a full system DMB +} + +def t2Int_SyncBarrierV7 : AInoP<(outs), (ins), + Pseudo, NoItinerary, + "dsb", "", + [(ARMSyncBarrierV7)]>, + Requires<[IsThumb2]> { + // FIXME: add support for options other than a full system DSB +} +} + +let mayLoad = 1 in { +def t2LDREXB : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, + Size4Bytes, NoItinerary, + "ldrexb", "\t$dest, [$ptr]", "", + []>; +def t2LDREXH : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, + Size4Bytes, NoItinerary, + "ldrexh", "\t$dest, [$ptr]", "", + []>; +def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, + Size4Bytes, NoItinerary, + "ldrex", "\t$dest, [$ptr]", "", + []>; +def t2LDREXD : Thumb2I<(outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "ldrexd", "\t$dest, $dest2, [$ptr]", "", + []>; +} + +let mayStore = 1 in { +def t2STREXB : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexb", "\t$success, $src, [$ptr]", "", + []>; +def t2STREXH : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexh", "\t$success, $src, [$ptr]", "", + []>; +def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "strex", "\t$success, $src, [$ptr]", "", + []>; +def t2STREXD : Thumb2I<(outs GPR:$success), + (ins GPR:$src, GPR:$src2, GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexd", "\t$success, $src, $src2, [$ptr]", "", + []>; +} + +//===----------------------------------------------------------------------===// // TLS Instructions // diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 304d0ef..22bd80e 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -449,7 +449,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, } if (MBBI != MBB.end()) { - MachineBasicBlock::iterator NextMBBI = next(MBBI); + MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); @@ -494,7 +494,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, } if (MBBI != MBB.end()) { - MachineBasicBlock::iterator NextMBBI = next(MBBI); + MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); if (Mode == ARM_AM::ia && isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset)); @@ -604,7 +604,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, } if (!DoMerge && MBBI != MBB.end()) { - MachineBasicBlock::iterator NextMBBI = next(MBBI); + MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); if (!isAM5 && isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { DoMerge = true; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 2564ed9..1c6fca7 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -95,7 +95,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, // Calculate and set max stack object alignment early, so we can decide // whether we will need stack realignment (and thus FP). - PM.add(createARMMaxStackAlignmentCalculatorPass()); + PM.add(createMaxStackAlignmentCalculatorPass()); // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 692bb19..362bbf1 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -1045,6 +1045,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, printNoHashImmediate(MI, OpNum); return false; case 'P': // Print a VFP double precision register. + case 'q': // Print a NEON quad precision register. printOperand(MI, OpNum); return false; case 'Q': diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 50abcf4..3c0414d 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -51,7 +51,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMII; for (; MII != E; MII = NextMII) { - NextMII = next(MII); + NextMII = llvm::next(MII); MachineInstr *MI = &*MII; if (MI->getOpcode() == ARM::VMOVD && diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index 206677b..d9942c8 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -338,7 +338,7 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) continue; - MachineBasicBlock::iterator NextI = next(MBBI); + MachineBasicBlock::iterator NextI = llvm::next(MBBI); for (unsigned R = 0; R < NumRegs; ++R) { MachineOperand &MO = MI->getOperand(FirstOpnd + R); assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 7602b6d..66d3b83 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -32,25 +32,6 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } -bool -Thumb1InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case ARM::tBX_RET: - case ARM::tBX_RET_vararg: - case ARM::tPOP_RET: - case ARM::tB: - case ARM::tBRIND: - case ARM::tBR_JTr: - return true; - default: - break; - } - - return false; -} - bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index b28229d..516ddf1 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -31,9 +31,6 @@ public: // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; - // Return true if the block does not fall through. - bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 37adf37..9f3816a 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -528,7 +528,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i+1).ChangeToImmediate(Mask); } Offset = (Offset - Mask * Scale); - MachineBasicBlock::iterator NII = next(II); + MachineBasicBlock::iterator NII = llvm::next(II); emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII, *this, dl); } else { diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 16c1e6f..f4a8c27 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -36,30 +36,6 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { } bool -Thumb2InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case ARM::t2LDM_RET: - case ARM::t2B: // Uncond branch. - case ARM::t2BR_JT: // Jumptable branch. - case ARM::t2TBB: // Table branch byte. - case ARM::t2TBH: // Table branch halfword. - case ARM::tBR_JTr: // Jumptable branch (16-bit version). - case ARM::tBX_RET: - case ARM::tBX_RET_vararg: - case ARM::tPOP_RET: - case ARM::tB: - case ARM::tBRIND: - return true; - default: - break; - } - - return false; -} - -bool Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 663a60b..a0f89a6 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -31,9 +31,6 @@ public: // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; - // Return true if the block does not fall through. - bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; - bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index b2fd7b3..35359aa 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -649,7 +649,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMII; for (; MII != E; MII = NextMII) { - NextMII = next(MII); + NextMII = llvm::next(MII); MachineInstr *MI = &*MII; LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp index 86173ff..39f0749 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -392,18 +392,6 @@ void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB, .addReg(Alpha::R31); } -bool AlphaInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case Alpha::RETDAG: // Return. - case Alpha::RETDAGp: - case Alpha::BR: // Uncond branch. - case Alpha::JMP: // Indirect branch. - return true; - default: return false; - } -} bool AlphaInstrInfo:: ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert(Cond.size() == 2 && "Invalid Alpha branch opcode!"); diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h index 274f452..c3b6044 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.h +++ b/lib/Target/Alpha/AlphaInstrInfo.h @@ -78,7 +78,6 @@ public: unsigned RemoveBranch(MachineBasicBlock &MBB) const; void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; - bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; /// getGlobalBaseReg - Return a virtual register initialized with the diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 9e4fe27..a52ca79 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -2573,7 +2573,7 @@ void CWriter::visitSwitchInst(SwitchInst &SI) { BasicBlock *Succ = cast<BasicBlock>(SI.getOperand(i+1)); printPHICopiesForSuccessor (SI.getParent(), Succ, 2); printBranchToBlock(SI.getParent(), Succ, 2); - if (Function::iterator(Succ) == next(Function::iterator(SI.getParent()))) + if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent()))) Out << " break;\n"; } Out << " }\n"; @@ -2593,7 +2593,7 @@ bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) { /// FIXME: This should be reenabled, but loop reordering safe!! return true; - if (next(Function::iterator(From)) != Function::iterator(To)) + if (llvm::next(Function::iterator(From)) != Function::iterator(To)) return true; // Not the direct successor, we need a goto. //isa<SwitchInst>(From->getTerminator()) diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index ecce8e3..2306665 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -580,10 +580,6 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, } } -bool -SPUInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - return (!MBB.empty() && isUncondBranch(&MBB.back())); -} //! Reverses a branch's condition, returning false on success. bool SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index c644a11..42677fc 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -79,9 +79,6 @@ namespace llvm { bool canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops) const; - //! Return true if the specified block does not fall through - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; - //! Reverses a branch's condition, returning false on success. virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 4bae6c7..a872fbd 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -976,21 +976,20 @@ namespace { nl(Out); printType(GV->getType()); if (GV->hasInitializer()) { - Constant* Init = GV->getInitializer(); + Constant *Init = GV->getInitializer(); printType(Init->getType()); - if (Function* F = dyn_cast<Function>(Init)) { + if (Function *F = dyn_cast<Function>(Init)) { nl(Out)<< "/ Function Declarations"; nl(Out); printFunctionHead(F); } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) { nl(Out) << "// Global Variable Declarations"; nl(Out); printVariableHead(gv); - } else { - nl(Out) << "// Constant Definitions"; nl(Out); - printConstant(gv); - } - if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) { + nl(Out) << "// Global Variable Definitions"; nl(Out); printVariableBody(gv); + } else { + nl(Out) << "// Constant Definitions"; nl(Out); + printConstant(Init); } } } diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index beccb2c..4edf422 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -86,10 +86,10 @@ namespace { void dump() { errs() << "MSP430ISelAddressMode " << this << '\n'; - if (Base.Reg.getNode() != 0) { + if (BaseType == RegBase && Base.Reg.getNode() != 0) { errs() << "Base.Reg "; Base.Reg.getNode()->dump(); - } else { + } else if (BaseType == FrameIndexBase) { errs() << " Base.FrameIndex " << Base.FrameIndex << '\n'; } errs() << " Disp " << Disp << '\n'; diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 29cc370..5fe9b20 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -15,6 +15,7 @@ #include "MSP430ISelLowering.h" #include "MSP430.h" +#include "MSP430MachineFunctionInfo.h" #include "MSP430TargetMachine.h" #include "MSP430Subtarget.h" #include "llvm/DerivedTypes.h" @@ -32,16 +33,38 @@ #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/VectorExtras.h" using namespace llvm; +typedef enum { + NoHWMult, + HWMultIntr, + HWMultNoIntr +} HWMultUseMode; + +static cl::opt<HWMultUseMode> +HWMultMode("msp430-hwmult-mode", + cl::desc("Hardware multiplier use mode"), + cl::init(HWMultNoIntr), + cl::values( + clEnumValN(NoHWMult, "no", + "Do not use hardware multiplier"), + clEnumValN(HWMultIntr, "interrupts", + "Assume hardware multiplier can be used inside interrupts"), + clEnumValN(HWMultNoIntr, "use", + "Assume hardware multiplier cannot be used inside interrupts"), + clEnumValEnd)); + MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : TargetLowering(tm, new TargetLoweringObjectFileELF()), Subtarget(*tm.getSubtargetImpl()), TM(tm) { + TD = getTargetData(); + // Set up the register classes. addRegisterClass(MVT::i8, MSP430::GR8RegisterClass); addRegisterClass(MVT::i16, MSP430::GR16RegisterClass); @@ -92,8 +115,8 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setOperationAction(ISD::BR_CC, MVT::i8, Custom); setOperationAction(ISD::BR_CC, MVT::i16, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Expand); - setOperationAction(ISD::SETCC, MVT::i8, Expand); - setOperationAction(ISD::SETCC, MVT::i16, Expand); + setOperationAction(ISD::SETCC, MVT::i8, Custom); + setOperationAction(ISD::SETCC, MVT::i16, Custom); setOperationAction(ISD::SELECT, MVT::i8, Expand); setOperationAction(ISD::SELECT, MVT::i16, Expand); setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); @@ -142,6 +165,15 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setOperationAction(ISD::SDIV, MVT::i16, Expand); setOperationAction(ISD::SDIVREM, MVT::i16, Expand); setOperationAction(ISD::SREM, MVT::i16, Expand); + + // Libcalls names. + if (HWMultMode == HWMultIntr) { + setLibcallName(RTLIB::MUL_I8, "__mulqi3hw"); + setLibcallName(RTLIB::MUL_I16, "__mulhi3hw"); + } else if (HWMultMode == HWMultNoIntr) { + setLibcallName(RTLIB::MUL_I8, "__mulqi3hw_noint"); + setLibcallName(RTLIB::MUL_I16, "__mulhi3hw_noint"); + } } SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { @@ -151,9 +183,12 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::SRA: return LowerShifts(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); default: llvm_unreachable("unimplemented operand"); return SDValue(); @@ -225,6 +260,13 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain, case CallingConv::C: case CallingConv::Fast: return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); + case CallingConv::MSP430_INTR: + if (Ins.empty()) + return Chain; + else { + llvm_report_error("ISRs cannot have arguments"); + return SDValue(); + } } } @@ -244,6 +286,9 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, case CallingConv::C: return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, Outs, Ins, dl, DAG, InVals); + case CallingConv::MSP430_INTR: + llvm_report_error("ISRs cannot be called directly"); + return SDValue(); } } @@ -340,6 +385,12 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, // CCValAssign - represent the assignment of the return value to a location SmallVector<CCValAssign, 16> RVLocs; + // ISRs cannot return any value. + if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) { + llvm_report_error("ISRs cannot return any value"); + return SDValue(); + } + // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, *DAG.getContext()); @@ -370,11 +421,14 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, Flag = Chain.getValue(1); } + unsigned Opc = (CallConv == CallingConv::MSP430_INTR ? + MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG); + if (Flag.getNode()) - return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + return DAG.getNode(Opc, dl, MVT::Other, Chain, Flag); // Return Void - return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain); + return DAG.getNode(Opc, dl, MVT::Other, Chain); } /// LowerCCCCallTo - functions arguments are copied from virtual regs to @@ -538,9 +592,21 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op, EVT VT = Op.getValueType(); DebugLoc dl = N->getDebugLoc(); - // We currently only lower shifts of constant argument. + // Expand non-constant shifts to loops: if (!isa<ConstantSDNode>(N->getOperand(1))) - return SDValue(); + switch (Opc) { + default: + assert(0 && "Invalid shift opcode!"); + case ISD::SHL: + return DAG.getNode(MSP430ISD::SHL, dl, + VT, N->getOperand(0), N->getOperand(1)); + case ISD::SRA: + return DAG.getNode(MSP430ISD::SRA, dl, + VT, N->getOperand(0), N->getOperand(1)); + case ISD::SRL: + return DAG.getNode(MSP430ISD::SRL, dl, + VT, N->getOperand(0), N->getOperand(1)); + } uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); @@ -648,6 +714,88 @@ SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { Chain, Dest, TargetCC, Flag); } + +SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + + // If we are doing an AND and testing against zero, then the CMP + // will not be generated. The AND (or BIT) will generate the condition codes, + // but they are different from CMP. + bool andCC = false; + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { + if (RHSC->isNullValue() && LHS.hasOneUse() && + (LHS.getOpcode() == ISD::AND || + (LHS.getOpcode() == ISD::TRUNCATE && + LHS.getOperand(0).getOpcode() == ISD::AND))) { + andCC = true; + } + } + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + SDValue TargetCC; + SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG); + + // Get the condition codes directly from the status register, if its easy. + // Otherwise a branch will be generated. Note that the AND and BIT + // instructions generate different flags than CMP, the carry bit can be used + // for NE/EQ. + bool Invert = false; + bool Shift = false; + bool Convert = true; + switch (cast<ConstantSDNode>(TargetCC)->getZExtValue()) { + default: + Convert = false; + break; + case MSP430CC::COND_HS: + // Res = SRW & 1, no processing is required + break; + case MSP430CC::COND_LO: + // Res = ~(SRW & 1) + Invert = true; + break; + case MSP430CC::COND_NE: + if (andCC) { + // C = ~Z, thus Res = SRW & 1, no processing is required + } else { + // Res = (SRW >> 1) & 1 + Shift = true; + } + break; + case MSP430CC::COND_E: + if (andCC) { + // C = ~Z, thus Res = ~(SRW & 1) + } else { + // Res = ~((SRW >> 1) & 1) + Shift = true; + } + Invert = true; + break; + } + EVT VT = Op.getValueType(); + SDValue One = DAG.getConstant(1, VT); + if (Convert) { + SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SRW, + MVT::i16, Flag); + if (Shift) + // FIXME: somewhere this is turned into a SRL, lower it MSP specific? + SR = DAG.getNode(ISD::SRA, dl, MVT::i16, SR, One); + SR = DAG.getNode(ISD::AND, dl, MVT::i16, SR, One); + if (Invert) + SR = DAG.getNode(ISD::XOR, dl, MVT::i16, SR, One); + return SR; + } else { + SDValue Zero = DAG.getConstant(0, VT); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); + SmallVector<SDValue, 4> Ops; + Ops.push_back(One); + Ops.push_back(Zero); + Ops.push_back(TargetCC); + Ops.push_back(Flag); + return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size()); + } +} + SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); @@ -682,6 +830,55 @@ SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op, DAG.getValueType(Val.getValueType())); } +SDValue MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>(); + int ReturnAddrIndex = FuncInfo->getRAIndex(); + + if (ReturnAddrIndex == 0) { + // Set up a frame object for the return address. + uint64_t SlotSize = TD->getPointerSize(); + ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, + true, false); + FuncInfo->setRAIndex(ReturnAddrIndex); + } + + return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); +} + +SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + DebugLoc dl = Op.getDebugLoc(); + + if (Depth > 0) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = + DAG.getConstant(TD->getPointerSize(), MVT::i16); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, getPointerTy(), + FrameAddr, Offset), + NULL, 0); + } + + // Just load the return address. + SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + RetAddrFI, NULL, 0); +} + +SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, + MSP430::FPW, VT); + while (Depth--) + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + return FrameAddr; +} + /// getPostIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if this node can be /// combined with a load / store to form a post-indexed load / store. @@ -722,6 +919,7 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return NULL; case MSP430ISD::RET_FLAG: return "MSP430ISD::RET_FLAG"; + case MSP430ISD::RETI_FLAG: return "MSP430ISD::RETI_FLAG"; case MSP430ISD::RRA: return "MSP430ISD::RRA"; case MSP430ISD::RLA: return "MSP430ISD::RLA"; case MSP430ISD::RRC: return "MSP430ISD::RRC"; @@ -730,6 +928,8 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { case MSP430ISD::BR_CC: return "MSP430ISD::BR_CC"; case MSP430ISD::CMP: return "MSP430ISD::CMP"; case MSP430ISD::SELECT_CC: return "MSP430ISD::SELECT_CC"; + case MSP430ISD::SHL: return "MSP430ISD::SHL"; + case MSP430ISD::SRA: return "MSP430ISD::SRA"; } } @@ -738,13 +938,131 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { //===----------------------------------------------------------------------===// MachineBasicBlock* +MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineFunction *F = BB->getParent(); + MachineRegisterInfo &RI = F->getRegInfo(); + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); + + unsigned Opc; + const TargetRegisterClass * RC; + switch (MI->getOpcode()) { + default: + assert(0 && "Invalid shift opcode!"); + case MSP430::Shl8: + Opc = MSP430::SHL8r1; + RC = MSP430::GR8RegisterClass; + break; + case MSP430::Shl16: + Opc = MSP430::SHL16r1; + RC = MSP430::GR16RegisterClass; + break; + case MSP430::Sra8: + Opc = MSP430::SAR8r1; + RC = MSP430::GR8RegisterClass; + break; + case MSP430::Sra16: + Opc = MSP430::SAR16r1; + RC = MSP430::GR16RegisterClass; + break; + case MSP430::Srl8: + Opc = MSP430::SAR8r1c; + RC = MSP430::GR8RegisterClass; + break; + case MSP430::Srl16: + Opc = MSP430::SAR16r1c; + RC = MSP430::GR16RegisterClass; + break; + } + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator I = BB; + ++I; + + // Create loop block + MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB); + + F->insert(I, LoopBB); + F->insert(I, RemBB); + + // Update machine-CFG edges by transferring all successors of the current + // block to the block containing instructions after shift. + RemBB->transferSuccessors(BB); + + // Inform sdisel of the edge changes. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + EM->insert(std::make_pair(*SI, RemBB)); + + // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB + BB->addSuccessor(LoopBB); + BB->addSuccessor(RemBB); + LoopBB->addSuccessor(RemBB); + LoopBB->addSuccessor(LoopBB); + + unsigned ShiftAmtReg = RI.createVirtualRegister(MSP430::GR8RegisterClass); + unsigned ShiftAmtReg2 = RI.createVirtualRegister(MSP430::GR8RegisterClass); + unsigned ShiftReg = RI.createVirtualRegister(RC); + unsigned ShiftReg2 = RI.createVirtualRegister(RC); + unsigned ShiftAmtSrcReg = MI->getOperand(2).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + + // BB: + // cmp 0, N + // je RemBB + BuildMI(BB, dl, TII.get(MSP430::CMP8ir)) + .addImm(0).addReg(ShiftAmtSrcReg); + BuildMI(BB, dl, TII.get(MSP430::JCC)) + .addMBB(RemBB) + .addImm(MSP430CC::COND_E); + + // LoopBB: + // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB] + // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB] + // ShiftReg2 = shift ShiftReg + // ShiftAmt2 = ShiftAmt - 1; + BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftReg) + .addReg(SrcReg).addMBB(BB) + .addReg(ShiftReg2).addMBB(LoopBB); + BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg) + .addReg(ShiftAmtSrcReg).addMBB(BB) + .addReg(ShiftAmtReg2).addMBB(LoopBB); + BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2) + .addReg(ShiftReg); + BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2) + .addReg(ShiftAmtReg).addImm(1); + BuildMI(LoopBB, dl, TII.get(MSP430::JCC)) + .addMBB(LoopBB) + .addImm(MSP430CC::COND_NE); + + // RemBB: + // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB] + BuildMI(RemBB, dl, TII.get(MSP430::PHI), DstReg) + .addReg(SrcReg).addMBB(BB) + .addReg(ShiftReg2).addMBB(LoopBB); + + return RemBB; +} + +MachineBasicBlock* MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + unsigned Opc = MI->getOpcode(); + + if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 || + Opc == MSP430::Sra8 || Opc == MSP430::Sra16 || + Opc == MSP430::Srl8 || Opc == MSP430::Srl16) + return EmitShiftInstr(MI, BB, EM); + const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); - assert((MI->getOpcode() == MSP430::Select16 || - MI->getOpcode() == MSP430::Select8) && + + assert((Opc == MSP430::Select16 || Opc == MSP430::Select8) && "Unexpected instr type to insert"); // To "insert" a SELECT instruction, we actually have to insert the diamond diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index d413ccb..4921500 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -27,6 +27,9 @@ namespace llvm { /// Return with a flag operand. Operand 0 is the chain operand. RET_FLAG, + /// Same as RET_FLAG, but used for returning from ISRs. + RETI_FLAG, + /// Y = R{R,L}A X, rotate right (left) arithmetically RRA, RLA, @@ -44,7 +47,7 @@ namespace llvm { /// CMP - Compare instruction. CMP, - /// SetCC. Operand 0 is condition code, and operand 1 is the flag + /// SetCC - Operand 0 is condition code, and operand 1 is the flag /// operand produced by a CMP instruction. SETCC, @@ -54,9 +57,12 @@ namespace llvm { /// instruction. BR_CC, - /// SELECT_CC. Operand 0 and operand 1 are selection variable, operand 3 + /// SELECT_CC - Operand 0 and operand 1 are selection variable, operand 3 /// is condition code and operand 4 is flag operand. - SELECT_CC + SELECT_CC, + + /// SHL, SRA, SRL - Non-constant shifts. + SHL, SRA, SRL }; } @@ -81,8 +87,12 @@ namespace llvm { SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG); SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG); SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG); + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG); + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); + SDValue getReturnAddressFrameIndex(SelectionDAG &DAG); TargetLowering::ConstraintType getConstraintType(const std::string &Constraint) const; @@ -92,6 +102,9 @@ namespace llvm { MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + MachineBasicBlock* EmitShiftInstr(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; private: SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee, @@ -144,6 +157,7 @@ namespace llvm { const MSP430Subtarget &Subtarget; const MSP430TargetMachine &TM; + const TargetData *TD; }; } // namespace llvm diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index b2f09c7..2ae6759 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -219,17 +219,6 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { return false; } -bool MSP430InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB)const{ - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case MSP430::RET: // Return. - case MSP430::JMP: // Uncond branch. - return true; - default: return false; - } -} - bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { const TargetInstrDesc &TID = MI->getDesc(); if (!TID.isTerminator()) return false; @@ -270,8 +259,8 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, } // If the block has any instructions after a JMP, delete them. - while (next(I) != MBB.end()) - next(I)->eraseFromParent(); + while (llvm::next(I) != MBB.end()) + llvm::next(I)->eraseFromParent(); Cond.clear(); FBB = 0; diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index 35e35db..e4ceeb9 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -61,7 +61,6 @@ public: // Branch folding goodness bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; - bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; bool isUnpredicatedTerminator(const MachineInstr *MI) const; bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index 7a26f6c..d67ba90 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -31,12 +31,15 @@ def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>]>; def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; +def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisI8<2>]>; //===----------------------------------------------------------------------===// // MSP430 Specific Node Definitions. //===----------------------------------------------------------------------===// -def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInFlag]>; +def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInFlag]>; +def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInFlag]>; def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>; def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>; @@ -54,6 +57,9 @@ def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>; def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>; def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, [SDNPHasChain, SDNPInFlag]>; def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, [SDNPInFlag]>; +def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>; +def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>; +def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>; //===----------------------------------------------------------------------===// // MSP430 Operand Definitions. @@ -90,7 +96,9 @@ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], []>; // Pattern Fragments def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>; def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>; - +def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; //===----------------------------------------------------------------------===// // Instruction list.. @@ -117,6 +125,27 @@ let usesCustomInserter = 1 in { "# Select16 PSEUDO", [(set GR16:$dst, (MSP430selectcc GR16:$src1, GR16:$src2, imm:$cc))]>; + let Defs = [SRW] in { + def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt), + "# Shl8 PSEUDO", + [(set GR8:$dst, (MSP430shl GR8:$src, GR8:$cnt))]>; + def Shl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt), + "# Shl16 PSEUDO", + [(set GR16:$dst, (MSP430shl GR16:$src, GR8:$cnt))]>; + def Sra8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt), + "# Sra8 PSEUDO", + [(set GR8:$dst, (MSP430sra GR8:$src, GR8:$cnt))]>; + def Sra16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt), + "# Sra16 PSEUDO", + [(set GR16:$dst, (MSP430sra GR16:$src, GR8:$cnt))]>; + def Srl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt), + "# Srl8 PSEUDO", + [(set GR8:$dst, (MSP430srl GR8:$src, GR8:$cnt))]>; + def Srl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt), + "# Srl16 PSEUDO", + [(set GR16:$dst, (MSP430srl GR16:$src, GR8:$cnt))]>; + + } } let neverHasSideEffects = 1 in @@ -128,7 +157,8 @@ def NOP : Pseudo<(outs), (ins), "nop", []>; // FIXME: Provide proper encoding! let isReturn = 1, isTerminator = 1, isBarrier = 1 in { - def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>; + def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>; + def RETI : Pseudo<(outs), (ins), "reti", [(MSP430retiflag)]>; } let isBranch = 1, isTerminator = 1 in { @@ -823,6 +853,65 @@ def CMP16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2), "cmp.w\t{$src1, $src2}", [(MSP430cmp (load addr:$src1), GR16:$src2), (implicit SRW)]>; + +// BIT TESTS, just sets condition codes +// Note that the C condition is set differently than when using CMP. +let isCommutable = 1 in { +def BIT8rr : Pseudo<(outs), (ins GR8:$src1, GR8:$src2), + "bit.b\t{$src2, $src1}", + [(MSP430cmp 0, (and_su GR8:$src1, GR8:$src2)), + (implicit SRW)]>; +def BIT16rr : Pseudo<(outs), (ins GR16:$src1, GR16:$src2), + "bit.w\t{$src2, $src1}", + [(MSP430cmp 0, (and_su GR16:$src1, GR16:$src2)), + (implicit SRW)]>; +} +def BIT8ri : Pseudo<(outs), (ins GR8:$src1, i8imm:$src2), + "bit.b\t{$src2, $src1}", + [(MSP430cmp 0, (and_su GR8:$src1, imm:$src2)), + (implicit SRW)]>; +def BIT16ri : Pseudo<(outs), (ins GR16:$src1, i16imm:$src2), + "bit.w\t{$src2, $src1}", + [(MSP430cmp 0, (and_su GR16:$src1, imm:$src2)), + (implicit SRW)]>; + +def BIT8rm : Pseudo<(outs), (ins GR8:$src1, memdst:$src2), + "bit.b\t{$src2, $src1}", + [(MSP430cmp 0, (and_su GR8:$src1, (load addr:$src2))), + (implicit SRW)]>; +def BIT16rm : Pseudo<(outs), (ins GR16:$src1, memdst:$src2), + "bit.w\t{$src2, $src1}", + [(MSP430cmp 0, (and_su GR16:$src1, (load addr:$src2))), + (implicit SRW)]>; + +def BIT8mr : Pseudo<(outs), (ins memsrc:$src1, GR8:$src2), + "bit.b\t{$src2, $src1}", + [(MSP430cmp 0, (and_su (load addr:$src1), GR8:$src2)), + (implicit SRW)]>; +def BIT16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2), + "bit.w\t{$src2, $src1}", + [(MSP430cmp 0, (and_su (load addr:$src1), GR16:$src2)), + (implicit SRW)]>; + +def BIT8mi : Pseudo<(outs), (ins memsrc:$src1, i8imm:$src2), + "bit.b\t{$src2, $src1}", + [(MSP430cmp 0, (and_su (load addr:$src1), (i8 imm:$src2))), + (implicit SRW)]>; +def BIT16mi : Pseudo<(outs), (ins memsrc:$src1, i16imm:$src2), + "bit.w\t{$src2, $src1}", + [(MSP430cmp 0, (and_su (load addr:$src1), (i16 imm:$src2))), + (implicit SRW)]>; + +def BIT8mm : Pseudo<(outs), (ins memsrc:$src1, memsrc:$src2), + "bit.b\t{$src2, $src1}", + [(MSP430cmp 0, (and_su (i8 (load addr:$src1)), + (load addr:$src2))), + (implicit SRW)]>; +def BIT16mm : Pseudo<(outs), (ins memsrc:$src1, memsrc:$src2), + "bit.w\t{$src2, $src1}", + [(MSP430cmp 0, (and_su (i16 (load addr:$src1)), + (load addr:$src2))), + (implicit SRW)]>; } // Defs = [SRW] //===----------------------------------------------------------------------===// @@ -905,3 +994,6 @@ def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst), // peephole patterns def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>; +def : Pat<(MSP430cmp 0, (trunc (and_su GR16:$src1, GR16:$src2))), + (BIT8rr (EXTRACT_SUBREG GR16:$src1, subreg_8bit), + (EXTRACT_SUBREG GR16:$src2, subreg_8bit))>; diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h index 1d26ae3..383fd2e 100644 --- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h +++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h @@ -25,14 +25,20 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo { /// stack frame in bytes. unsigned CalleeSavedFrameSize; + /// ReturnAddrIndex - FrameIndex for return slot. + int ReturnAddrIndex; + public: MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {} explicit MSP430MachineFunctionInfo(MachineFunction &MF) - : CalleeSavedFrameSize(0) {} + : CalleeSavedFrameSize(0), ReturnAddrIndex(0) {} unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; } + + int getRAIndex() const { return ReturnAddrIndex; } + void setRAIndex(int Index) { ReturnAddrIndex = Index; } }; } // End llvm namespace diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 92baad9..e85c7a2 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -17,6 +17,7 @@ #include "MSP430MachineFunctionInfo.h" #include "MSP430RegisterInfo.h" #include "MSP430TargetMachine.h" +#include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -37,17 +38,26 @@ MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm, const unsigned* MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + const Function* F = MF->getFunction(); static const unsigned CalleeSavedRegs[] = { MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W, MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W, 0 }; + static const unsigned CalleeSavedRegsIntr[] = { + MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W, + MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W, + MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W, + 0 + }; - return CalleeSavedRegs; + return (F->getCallingConv() == CallingConv::MSP430_INTR ? + CalleeSavedRegsIntr : CalleeSavedRegs); } const TargetRegisterClass *const * MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + const Function* F = MF->getFunction(); static const TargetRegisterClass * const CalleeSavedRegClasses[] = { &MSP430::GR16RegClass, &MSP430::GR16RegClass, &MSP430::GR16RegClass, &MSP430::GR16RegClass, @@ -55,8 +65,18 @@ MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &MSP430::GR16RegClass, &MSP430::GR16RegClass, 0 }; + static const TargetRegisterClass * const CalleeSavedRegClassesIntr[] = { + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + 0 + }; - return CalleeSavedRegClasses; + return (F->getCallingConv() == CallingConv::MSP430_INTR ? + CalleeSavedRegClassesIntr : CalleeSavedRegClasses); } BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const { @@ -193,10 +213,10 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // We need to materialize the offset via add instruction. unsigned DstReg = MI.getOperand(0).getReg(); if (Offset < 0) - BuildMI(MBB, next(II), dl, TII.get(MSP430::SUB16ri), DstReg) + BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::SUB16ri), DstReg) .addReg(DstReg).addImm(-Offset); else - BuildMI(MBB, next(II), dl, TII.get(MSP430::ADD16ri), DstReg) + BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg) .addReg(DstReg).addImm(Offset); return 0; @@ -251,7 +271,7 @@ void MSP430RegisterInfo::emitPrologue(MachineFunction &MF) const { .addReg(MSP430::SPW); // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(MSP430::FPW); @@ -292,7 +312,8 @@ void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF, DebugLoc DL = MBBI->getDebugLoc(); switch (RetOpcode) { - case MSP430::RET: break; // These are ok + case MSP430::RET: + case MSP430::RETI: break; // These are ok default: llvm_unreachable("Can only insert epilog into returning blocks"); } diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 6d8e160..48b9bdf 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -590,22 +590,6 @@ RemoveBranch(MachineBasicBlock &MBB) const return 2; } -/// BlockHasNoFallThrough - Analyze if MachineBasicBlock does not -/// fall-through into its successor block. -bool MipsInstrInfo:: -BlockHasNoFallThrough(const MachineBasicBlock &MBB) const -{ - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case Mips::RET: // Return. - case Mips::JR: // Indirect branch. - case Mips::J: // Uncond branch. - return true; - default: return false; - } -} - /// ReverseBranchCondition - Return the inverse opcode of the /// specified Branch instruction. bool MipsInstrInfo:: diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 249d3de..ab8dc59 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -232,7 +232,6 @@ public: return 0; } - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 0083598..af7d812 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -740,18 +740,6 @@ bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, } -bool PPCInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case PPC::BLR: // Return. - case PPC::B: // Uncond branch. - case PPC::BCTR: // Indirect branch. - return true; - default: return false; - } -} - bool PPCInstrInfo:: ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert(Cond.size() == 2 && "Invalid PPC branch opcode!"); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index bb0dc15a..57facac 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -143,7 +143,6 @@ public: virtual bool canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops) const; - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 2d8a687..e1772c2 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -801,8 +801,21 @@ void bar(unsigned n) { true(); } -I think this basically amounts to a dag combine to simplify comparisons against -multiply hi's into a comparison against the mullo. +This is equivalent to the following, where 2863311531 is the multiplicative +inverse of 3, and 1431655766 is ((2^32)-1)/3+1: +void bar(unsigned n) { + if (n * 2863311531U < 1431655766U) + true(); +} + +The same transformation can work with an even modulo with the addition of a +rotate: rotate the result of the multiply to the right by the number of bits +which need to be zero for the condition to be true, and shrink the compare RHS +by the same amount. Unless the target supports rotates, though, that +transformation probably isn't worthwhile. + +The transformation can also easily be made to work with non-zero equality +comparisons: just transform, for example, "n % 3 == 1" to "(n-1) % 3 == 0". //===---------------------------------------------------------------------===// @@ -823,20 +836,6 @@ int main() { //===---------------------------------------------------------------------===// -Instcombine will merge comparisons like (x >= 10) && (x < 20) by producing (x - -10) u< 10, but only when the comparisons have matching sign. - -This could be converted with a similiar technique. (PR1941) - -define i1 @test(i8 %x) { - %A = icmp uge i8 %x, 5 - %B = icmp slt i8 %x, 20 - %C = and i1 %A, %B - ret i1 %C -} - -//===---------------------------------------------------------------------===// - These functions perform the same computation, but produce different assembly. define i8 @select(i8 %x) readnone nounwind { @@ -884,18 +883,6 @@ The expression should optimize to something like //===---------------------------------------------------------------------===// -From GCC Bug 15241: -unsigned int -foo (unsigned int a, unsigned int b) -{ - if (a <= 7 && b <= 7) - baz (); -} -Should combine to "(a|b) <= 7". Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - From GCC Bug 3756: int pn (int n) @@ -907,19 +894,6 @@ Should combine to (n >> 31) | 1. Currently not optimized with "clang //===---------------------------------------------------------------------===// -From GCC Bug 28685: -int test(int a, int b) -{ - int lt = a < b; - int eq = a == b; - - return (lt || eq); -} -Should combine to "a <= b". Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts | llc". - -//===---------------------------------------------------------------------===// - void a(int variable) { if (variable == 4 || variable == 6) @@ -993,12 +967,6 @@ Should combine to 0. Currently not optimized with "clang //===---------------------------------------------------------------------===// -int a(unsigned char* b) {return *b > 99;} -There's an unnecessary zext in the generated code with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - int a(unsigned b) {return ((b << 31) | (b << 30)) >> 31;} Should be combined to "((b >> 1) | b) & 1". Currently not optimized with "clang -emit-llvm-bc | opt -std-compile-opts". @@ -1011,12 +979,6 @@ Should combine to "x | (y & 3)". Currently not optimized with "clang //===---------------------------------------------------------------------===// -unsigned a(unsigned a) {return ((a | 1) & 3) | (a & -4);} -Should combine to "a | 1". Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - int a(int a, int b, int c) {return (~a & c) | ((c|a) & b);} Should fold to "(~a & c) | (a & b)". Currently not optimized with "clang -emit-llvm-bc | opt -std-compile-opts". @@ -1704,3 +1666,50 @@ need all but the bottom two bits from %A, and if we gave that mask to SDB it would delete the or instruction for us. //===---------------------------------------------------------------------===// + +FunctionAttrs is not marking this function as readnone (just readonly): +$ clang t.c -emit-llvm -S -o - -O0 | opt -mem2reg -S -functionattrs + +int t(int a, int b, int c) { + int *p; + if (a) + p = &a; + else + p = &c; + return *p; +} + +This is because we codegen this to: + +define i32 @t(i32 %a, i32 %b, i32 %c) nounwind readonly ssp { +entry: + %a.addr = alloca i32 ; <i32*> [#uses=3] + %c.addr = alloca i32 ; <i32*> [#uses=2] +... + +if.end: + %p.0 = phi i32* [ %a.addr, %if.then ], [ %c.addr, %if.else ] + %tmp2 = load i32* %p.0 ; <i32> [#uses=1] + ret i32 %tmp2 +} + +And functionattrs doesn't realize that the p.0 load points to function local +memory. + +Also, functionattrs doesn't know about memcpy/memset. This function should be +marked readnone, since it only twiddles local memory, but functionattrs doesn't +handle memset/memcpy/memmove aggressively: + +struct X { int *p; int *q; }; +int foo() { + int i = 0, j = 1; + struct X x, y; + int **p; + y.p = &i; + x.q = &j; + p = __builtin_memcpy (&x, &y, sizeof (int *)); + return **p; +} + +//===---------------------------------------------------------------------===// + diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index d82d928..5fa7e8c 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -402,18 +402,6 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { return false; } -bool SystemZInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB)const{ - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case SystemZ::RET: // Return. - case SystemZ::JMP: // Uncond branch. - case SystemZ::JMPr: // Indirect branch. - return true; - default: return false; - } -} - bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { const TargetInstrDesc &TID = MI->getDesc(); if (!TID.isTerminator()) return false; @@ -454,8 +442,8 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, } // If the block has any instructions after a JMP, delete them. - while (next(I) != MBB.end()) - next(I)->eraseFromParent(); + while (llvm::next(I) != MBB.end()) + llvm::next(I)->eraseFromParent(); Cond.clear(); FBB = 0; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index e16d704..ef3b39e 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -89,7 +89,6 @@ public: const std::vector<CalleeSavedInfo> &CSI) const; bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 4d1c01f..1318195 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -247,7 +247,7 @@ void SystemZRegisterInfo::emitPrologue(MachineFunction &MF) const { .addReg(SystemZ::R15D); // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(SystemZ::R11D); diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index fc71bc3..9434a19 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -117,14 +117,6 @@ TargetAlignElem::operator==(const TargetAlignElem &rhs) const { && TypeBitWidth == rhs.TypeBitWidth); } -std::ostream & -TargetAlignElem::dump(std::ostream &os) const { - return os << AlignType - << TypeBitWidth - << ":" << (int) (ABIAlign * 8) - << ":" << (int) (PrefAlign * 8); -} - const TargetAlignElem TargetData::InvalidAlignmentElem = TargetAlignElem::get((AlignTypeEnum) -1, 0, 0, 0); @@ -323,11 +315,10 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, : Alignments[BestMatchIdx].PrefAlign; } -typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy; - -namespace llvm { +namespace { class StructLayoutMap : public AbstractTypeUser { + typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy; LayoutInfoTy LayoutInfo; /// refineAbstractType - The callback method invoked when an abstract type is @@ -336,19 +327,11 @@ class StructLayoutMap : public AbstractTypeUser { /// virtual void refineAbstractType(const DerivedType *OldTy, const Type *) { - const StructType *STy = dyn_cast<const StructType>(OldTy); - if (!STy) { - OldTy->removeAbstractTypeUser(this); - return; - } - - StructLayout *SL = LayoutInfo[STy]; - if (SL) { - SL->~StructLayout(); - free(SL); - LayoutInfo[STy] = NULL; - } - + const StructType *STy = cast<const StructType>(OldTy); + LayoutInfoTy::iterator Iter = LayoutInfo.find(STy); + Iter->second->~StructLayout(); + free(Iter->second); + LayoutInfo.erase(Iter); OldTy->removeAbstractTypeUser(this); } @@ -358,70 +341,43 @@ class StructLayoutMap : public AbstractTypeUser { /// This method notifies ATU's when this occurs for a type. /// virtual void typeBecameConcrete(const DerivedType *AbsTy) { - const StructType *STy = dyn_cast<const StructType>(AbsTy); - if (!STy) { - AbsTy->removeAbstractTypeUser(this); - return; - } - - StructLayout *SL = LayoutInfo[STy]; - if (SL) { - SL->~StructLayout(); - free(SL); - LayoutInfo[STy] = NULL; - } - + const StructType *STy = cast<const StructType>(AbsTy); + LayoutInfoTy::iterator Iter = LayoutInfo.find(STy); + Iter->second->~StructLayout(); + free(Iter->second); + LayoutInfo.erase(Iter); AbsTy->removeAbstractTypeUser(this); } - bool insert(const Type *Ty) { - if (Ty->isAbstract()) - Ty->addAbstractTypeUser(this); - return true; - } - public: virtual ~StructLayoutMap() { // Remove any layouts. for (LayoutInfoTy::iterator - I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I) - if (StructLayout *SL = I->second) { - SL->~StructLayout(); - free(SL); - } - } + I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I) { + const Type *Key = I->first; + StructLayout *Value = I->second; - inline LayoutInfoTy::iterator begin() { - return LayoutInfo.begin(); - } - inline LayoutInfoTy::iterator end() { - return LayoutInfo.end(); - } - inline LayoutInfoTy::const_iterator begin() const { - return LayoutInfo.begin(); - } - inline LayoutInfoTy::const_iterator end() const { - return LayoutInfo.end(); - } + if (Key->isAbstract()) + Key->removeAbstractTypeUser(this); - LayoutInfoTy::iterator find(const StructType *&Val) { - return LayoutInfo.find(Val); - } - LayoutInfoTy::const_iterator find(const StructType *&Val) const { - return LayoutInfo.find(Val); + Value->~StructLayout(); + free(Value); + } } - bool erase(const StructType *&Val) { - return LayoutInfo.erase(Val); - } - bool erase(LayoutInfoTy::iterator I) { - return LayoutInfo.erase(I); + void InvalidateEntry(const StructType *Ty) { + LayoutInfoTy::iterator I = LayoutInfo.find(Ty); + if (I == LayoutInfo.end()) return; + + I->second->~StructLayout(); + free(I->second); + LayoutInfo.erase(I); + + if (Ty->isAbstract()) + Ty->removeAbstractTypeUser(this); } - StructLayout *&operator[](const Type *Key) { - const StructType *STy = dyn_cast<const StructType>(Key); - assert(STy && "Trying to access the struct layout map with a non-struct!"); - insert(STy); + StructLayout *&operator[](const StructType *STy) { return LayoutInfo[STy]; } @@ -429,17 +385,18 @@ public: virtual void dump() const {} }; -} // end namespace llvm +} // end anonymous namespace TargetData::~TargetData() { - delete LayoutMap; + delete static_cast<StructLayoutMap*>(LayoutMap); } const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { if (!LayoutMap) LayoutMap = new StructLayoutMap(); - StructLayout *&SL = (*LayoutMap)[Ty]; + StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap); + StructLayout *&SL = (*STM)[Ty]; if (SL) return SL; // Otherwise, create the struct layout. Because it is variable length, we @@ -453,6 +410,10 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { SL = L; new (L) StructLayout(Ty, *this); + + if (Ty->isAbstract()) + Ty->addAbstractTypeUser(STM); + return L; } @@ -463,15 +424,10 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const { if (!LayoutMap) return; // No cache. - DenseMap<const StructType*, StructLayout*>::iterator I = LayoutMap->find(Ty); - if (I == LayoutMap->end()) return; - - I->second->~StructLayout(); - free(I->second); - LayoutMap->erase(I); + StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap); + STM->InvalidateEntry(Ty); } - std::string TargetData::getStringRepresentation() const { std::string Result; raw_string_ostream OS(Result); diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index a167118..684c61f 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -62,11 +62,6 @@ MCCodeEmitter *createX86MCCodeEmitter(const Target &, TargetMachine &TM); /// FunctionPass *createEmitX86CodeToMemory(); -/// createX86MaxStackAlignmentCalculatorPass - This function returns a pass -/// which calculates maximal stack alignment required for function -/// -FunctionPass *createX86MaxStackAlignmentCalculatorPass(); - extern Target TheX86_32Target, TheX86_64Target; } // End llvm namespace diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index afd5525..5017af2 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -16,6 +16,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/ADT/StringSet.h" +#include "X86MachineFunctionInfo.h" namespace llvm { class X86MachineFunctionInfo; diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index d77f039..12d3d04 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -64,11 +64,18 @@ def RetCC_X86_32_C : CallingConv<[ // X86-32 FastCC return-value convention. def RetCC_X86_32_Fast : CallingConv<[ // The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has - // SSE2, otherwise it is the the C calling conventions. + // SSE2. // This can happen when a float, 2 x float, or 3 x float vector is split by // target lowering, and is returned in 1-3 sse regs. CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, + + // For integers, ECX can be used as an extra return register + CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>, + CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>, + CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>, + + // Otherwise, it is the same as the common X86 calling convention. CCDelegateTo<RetCC_X86Common> ]>; diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index a2fe9b0..044bd4b 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -289,7 +289,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { while (Start != BB.begin() && prior(Start) != PrevI) --Start; errs() << "Inserted instructions:\n\t"; Start->print(errs(), &MF.getTarget()); - while (++Start != next(I)) {} + while (++Start != llvm::next(I)) {} } dumpStack(); ); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d80b8ec..0517b56 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -595,6 +595,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand); + setOperationAction(ISD::TRUNCATE, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand); + for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) + setTruncStoreAction((MVT::SimpleValueType)VT, + (MVT::SimpleValueType)InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); } // FIXME: In order to prevent SSE instructions being expanded to MMX ones @@ -671,8 +683,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); - setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand); - setOperationAction(ISD::TRUNCATE, MVT::v8i8, Expand); setOperationAction(ISD::SELECT, MVT::v8i8, Promote); setOperationAction(ISD::SELECT, MVT::v4i16, Promote); setOperationAction(ISD::SELECT, MVT::v2i32, Promote); @@ -3344,6 +3354,82 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, } SDValue +X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, + SelectionDAG &DAG) { + + // Check if the scalar load can be widened into a vector load. And if + // the address is "base + cst" see if the cst can be "absorbed" into + // the shuffle mask. + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) { + SDValue Ptr = LD->getBasePtr(); + if (!ISD::isNormalLoad(LD) || LD->isVolatile()) + return SDValue(); + EVT PVT = LD->getValueType(0); + if (PVT != MVT::i32 && PVT != MVT::f32) + return SDValue(); + + int FI = -1; + int64_t Offset = 0; + if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) { + FI = FINode->getIndex(); + Offset = 0; + } else if (Ptr.getOpcode() == ISD::ADD && + isa<ConstantSDNode>(Ptr.getOperand(1)) && + isa<FrameIndexSDNode>(Ptr.getOperand(0))) { + FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + Offset = Ptr.getConstantOperandVal(1); + Ptr = Ptr.getOperand(0); + } else { + return SDValue(); + } + + SDValue Chain = LD->getChain(); + // Make sure the stack object alignment is at least 16. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + if (DAG.InferPtrAlignment(Ptr) < 16) { + if (MFI->isFixedObjectIndex(FI)) { + // Can't change the alignment. Reference stack + offset explicitly + // if stack pointer is at least 16-byte aligned. + unsigned StackAlign = Subtarget->getStackAlignment(); + if (StackAlign < 16) + return SDValue(); + Offset = MFI->getObjectOffset(FI) + Offset; + SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, + getPointerTy()); + Ptr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, + DAG.getConstant(Offset & ~15, getPointerTy())); + Offset %= 16; + } else { + MFI->setObjectAlignment(FI, 16); + } + } + + // (Offset % 16) must be multiple of 4. Then address is then + // Ptr + (Offset & ~15). + if (Offset < 0) + return SDValue(); + if ((Offset % 16) & 3) + return SDValue(); + int64_t StartOffset = Offset & ~15; + if (StartOffset) + Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(), + Ptr,DAG.getConstant(StartOffset, Ptr.getValueType())); + + int EltNo = (Offset - StartOffset) >> 2; + int Mask[4] = { EltNo, EltNo, EltNo, EltNo }; + EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32; + SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0); + // Canonicalize it to a v4i32 shuffle. + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getVectorShuffle(MVT::v4i32, dl, V1, + DAG.getUNDEF(MVT::v4i32), &Mask[0])); + } + + return SDValue(); +} + +SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); // All zero's are handled with pxor, all one's are handled with pcmpeqd. @@ -3486,8 +3572,19 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } // Splat is obviously ok. Let legalizer expand it to a shuffle. - if (Values.size() == 1) + if (Values.size() == 1) { + if (EVTBits == 32) { + // Instead of a shuffle like this: + // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> + // Check if it's possible to issue this instead. + // shuffle (vload ptr)), undef, <1, 1, 1, 1> + unsigned Idx = CountTrailingZeros_32(NonZeros); + SDValue Item = Op.getOperand(Idx); + if (Op.getNode()->isOnlyUserOf(Item.getNode())) + return LowerAsSplatVectorLoad(Item, VT, dl, DAG); + } return SDValue(); + } // A vector full of immediates; various special cases are already // handled, so this is best done with a single constant-pool load. @@ -4278,7 +4375,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { unsigned ShAmt = 0; SDValue ShVal; bool isShift = getSubtarget()->hasSSE2() && - isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); + isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. @@ -4815,6 +4912,7 @@ static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); DebugLoc dl = GA->getDebugLoc(); SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), @@ -4828,6 +4926,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue Ops[] = { Chain, TGA }; Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2); } + + // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. + MFI->setHasCalls(true); + SDValue Flag = Chain.getValue(1); return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); } @@ -5648,6 +5750,17 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { return SDValue(); SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG); + + // Use sbb x, x to materialize carry bit into a GPR. + // FIXME: Temporarily disabled since it breaks self-hosting. It's apparently + // miscompiling ARMISelDAGToDAG.cpp. + if (0 && !isFP && X86CC == X86::COND_B) { + return DAG.getNode(ISD::AND, dl, MVT::i8, + DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), Cond), + DAG.getConstant(1, MVT::i8)); + } + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), Cond); } @@ -5800,9 +5913,18 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { Cond = NewCond; } + // Look pass (and (setcc_carry (cmp ...)), 1). + if (Cond.getOpcode() == ISD::AND && + Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1)); + if (C && C->getAPIntValue() == 1) + Cond = Cond.getOperand(0); + } + // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - if (Cond.getOpcode() == X86ISD::SETCC) { + if (Cond.getOpcode() == X86ISD::SETCC || + Cond.getOpcode() == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -5885,9 +6007,18 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { Cond = LowerXALUO(Cond, DAG); #endif + // Look pass (and (setcc_carry (cmp ...)), 1). + if (Cond.getOpcode() == ISD::AND && + Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1)); + if (C && C->getAPIntValue() == 1) + Cond = Cond.getOperand(0); + } + // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - if (Cond.getOpcode() == X86ISD::SETCC) { + if (Cond.getOpcode() == X86ISD::SETCC || + Cond.getOpcode() == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -7274,6 +7405,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::COMI: return "X86ISD::COMI"; case X86ISD::UCOMI: return "X86ISD::UCOMI"; case X86ISD::SETCC: return "X86ISD::SETCC"; + case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY"; case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; @@ -8327,16 +8459,6 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, return TargetLowering::isGAPlusOffset(N, GA, Offset); } -static bool isBaseAlignmentOfN(unsigned N, SDNode *Base, - const TargetLowering &TLI) { - GlobalValue *GV; - int64_t Offset = 0; - if (TLI.isGAPlusOffset(Base, GV, Offset)) - return (GV->getAlignment() >= N && (Offset % N) == 0); - // DAG combine handles the stack object case. - return false; -} - static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, EVT EltVT, LoadSDNode *&LDBase, unsigned &LastLoadedElt, @@ -8366,7 +8488,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, continue; LoadSDNode *LD = cast<LoadSDNode>(Elt); - if (!TLI.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i, MFI)) + if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i)) return false; LastLoadedElt = i; } @@ -8399,7 +8521,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); if (LastLoadedElt == NumElems - 1) { - if (isBaseAlignmentOfN(16, LD->getBasePtr().getNode(), TLI)) + if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16) return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), LD->isVolatile()); @@ -8858,11 +8980,42 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + + // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2)) + // since the result of setcc_c is all zero's or all ones. + if (N1C && N0.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == X86ISD::SETCC_CARRY || + ((N00.getOpcode() == ISD::ANY_EXTEND || + N00.getOpcode() == ISD::ZERO_EXTEND) && + N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) { + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + APInt ShAmt = N1C->getAPIntValue(); + Mask = Mask.shl(ShAmt); + if (Mask != 0) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + N00, DAG.getConstant(Mask, VT)); + } + } + + return SDValue(); +} /// PerformShiftCombine - Transforms vector shift nodes to use vector shifts /// when possible. static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (!VT.isVector() && VT.isInteger() && + N->getOpcode() == ISD::SHL) + return PerformSHLCombine(N, DAG); + // On X86 with SSE2 support, we can transform this to a vector shift if // all elements are shifted by the same amount. We can't do this in legalize // because the a constant vector is typically transformed to a constant pool @@ -8870,7 +9023,6 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, if (!Subtarget->hasSSE2()) return SDValue(); - EVT VT = N->getValueType(0); if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) return SDValue(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 7b4ab62..64bc70c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -118,6 +118,10 @@ namespace llvm { /// operand produced by a CMP instruction. SETCC, + // Same as SETCC except it's materialized with a sbb and the value is all + // one's or all zero's. + SETCC_CARRY, + /// X86 conditional moves. Operand 0 and operand 1 are the two values /// to select from. Operand 2 is the condition code, and operand 3 is the /// flag operand produced by a CMP or TEST instruction. It also writes a @@ -626,7 +630,9 @@ namespace llvm { std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned); - + + SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, + SelectionDAG &DAG); SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG); SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG); SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index b5fa862..b6a2c05 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1333,6 +1333,15 @@ def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64] X86_COND_NO, EFLAGS))]>, TB; } // isTwoAddress +// Use sbb to materialize carry flag into a GPR. +let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in +def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), + "sbb{q}\t$dst, $dst", + [(set GR64:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; + +def : Pat<(i64 (anyext (X86setcc_c X86_COND_B, EFLAGS))), + (SETB_C64r)>; + //===----------------------------------------------------------------------===// // Conversion Instructions... // diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index a37013d..1947d35 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -783,12 +783,14 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) return Reg; // Check for post-frame index elimination operations - return hasLoadFromStackSlot(MI, FrameIndex); + const MachineMemOperand *Dummy; + return hasLoadFromStackSlot(MI, Dummy, FrameIndex); } return 0; } bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, int &FrameIndex) const { for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), oe = MI->memoperands_end(); @@ -798,6 +800,7 @@ bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, if (const FixedStackPseudoSourceValue *Value = dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { FrameIndex = Value->getFrameIndex(); + MMO = *o; return true; } } @@ -819,12 +822,14 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, if ((Reg = isStoreToStackSlot(MI, FrameIndex))) return Reg; // Check for post-frame index elimination operations - return hasStoreToStackSlot(MI, FrameIndex); + const MachineMemOperand *Dummy; + return hasStoreToStackSlot(MI, Dummy, FrameIndex); } return 0; } bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, int &FrameIndex) const { for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), oe = MI->memoperands_end(); @@ -834,6 +839,7 @@ bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, if (const FixedStackPseudoSourceValue *Value = dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { FrameIndex = Value->getFrameIndex(); + MMO = *o; return true; } } @@ -1052,6 +1058,112 @@ static bool hasLiveCondCodeDef(MachineInstr *MI) { return false; } +/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when +/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting +/// to a 32-bit superregister and then truncating back down to a 16-bit +/// subregister. +MachineInstr * +X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, + MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const { + MachineInstr *MI = MBBI; + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + bool isDead = MI->getOperand(0).isDead(); + bool isKill = MI->getOperand(1).isKill(); + + unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() + ? X86::LEA64_32r : X86::LEA32r; + MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); + unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); + unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); + + // Build and insert into an implicit UNDEF value. This is OK because + // well be shifting and then extracting the lower 16-bits. + // This has the potential to cause partial register stall. e.g. + // movw (%rbp,%rcx,2), %dx + // leal -65(%rdx), %esi + // But testing has shown this *does* help performance in 64-bit mode (at + // least on modern x86 machines). + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); + MachineInstr *InsMI = + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) + .addReg(leaInReg) + .addReg(Src, getKillRegState(isKill)) + .addImm(X86::SUBREG_16BIT); + + MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), + get(Opc), leaOutReg); + switch (MIOpc) { + default: + llvm_unreachable(0); + break; + case X86::SHL16ri: { + unsigned ShAmt = MI->getOperand(2).getImm(); + MIB.addReg(0).addImm(1 << ShAmt) + .addReg(leaInReg, RegState::Kill).addImm(0); + break; + } + case X86::INC16r: + case X86::INC64_16r: + addLeaRegOffset(MIB, leaInReg, true, 1); + break; + case X86::DEC16r: + case X86::DEC64_16r: + addLeaRegOffset(MIB, leaInReg, true, -1); + break; + case X86::ADD16ri: + case X86::ADD16ri8: + addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); + break; + case X86::ADD16rr: { + unsigned Src2 = MI->getOperand(2).getReg(); + bool isKill2 = MI->getOperand(2).isKill(); + unsigned leaInReg2 = 0; + MachineInstr *InsMI2 = 0; + if (Src == Src2) { + // ADD16rr %reg1028<kill>, %reg1028 + // just a single insert_subreg. + addRegReg(MIB, leaInReg, true, leaInReg, false); + } else { + leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass); + // Build and insert into an implicit UNDEF value. This is OK because + // well be shifting and then extracting the lower 16-bits. + BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); + InsMI2 = + BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2) + .addReg(leaInReg2) + .addReg(Src2, getKillRegState(isKill2)) + .addImm(X86::SUBREG_16BIT); + addRegReg(MIB, leaInReg, true, leaInReg2, true); + } + if (LV && isKill2 && InsMI2) + LV->replaceKillInstruction(Src2, MI, InsMI2); + break; + } + } + + MachineInstr *NewMI = MIB; + MachineInstr *ExtMI = + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) + .addReg(Dest, RegState::Define | getDeadRegState(isDead)) + .addReg(leaOutReg, RegState::Kill) + .addImm(X86::SUBREG_16BIT); + + if (LV) { + // Update live variables + LV->getVarInfo(leaInReg).Kills.push_back(NewMI); + LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); + if (isKill) + LV->replaceKillInstruction(Src, MI, InsMI); + if (isDead) + LV->replaceKillInstruction(Dest, MI, ExtMI); + } + + return ExtMI; +} + /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target /// may be able to convert a two-address instruction into a true @@ -1077,7 +1189,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr *NewMI = NULL; // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When // we have better subtarget support, enable the 16-bit LEA generation here. + // 16-bit LEA is also slow on Core2. bool DisableLEA16 = true; + bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); unsigned MIOpc = MI->getOpcode(); switch (MIOpc) { @@ -1116,8 +1230,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned ShAmt = MI->getOperand(2).getImm(); if (ShAmt == 0 || ShAmt >= 4) return 0; - unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ? - X86::LEA64_32r : X86::LEA32r; + unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) @@ -1131,51 +1244,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned ShAmt = MI->getOperand(2).getImm(); if (ShAmt == 0 || ShAmt >= 4) return 0; - if (DisableLEA16) { - // If 16-bit LEA is disabled, use 32-bit LEA via subregisters. - MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); - unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() - ? X86::LEA64_32r : X86::LEA32r; - unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); - unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); - - // Build and insert into an implicit UNDEF value. This is OK because - // well be shifting and then extracting the lower 16-bits. - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); - MachineInstr *InsMI = - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) - .addReg(leaInReg) - .addReg(Src, getKillRegState(isKill)) - .addImm(X86::SUBREG_16BIT); - - NewMI = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg) - .addReg(0).addImm(1 << ShAmt) - .addReg(leaInReg, RegState::Kill) - .addImm(0); - - MachineInstr *ExtMI = - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(leaOutReg, RegState::Kill) - .addImm(X86::SUBREG_16BIT); - - if (LV) { - // Update live variables - LV->getVarInfo(leaInReg).Kills.push_back(NewMI); - LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); - if (isKill) - LV->replaceKillInstruction(Src, MI, InsMI); - if (isDead) - LV->replaceKillInstruction(Dest, MI, ExtMI); - } - return ExtMI; - } else { - NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)) - .addImm(0); - } + if (DisableLEA16) + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; + NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | getDeadRegState(isDead)) + .addReg(0).addImm(1 << ShAmt) + .addReg(Src, getKillRegState(isKill)) + .addImm(0); break; } default: { @@ -1185,7 +1260,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (hasLiveCondCodeDef(MI)) return 0; - bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); switch (MIOpc) { default: return 0; case X86::INC64r: @@ -1202,7 +1276,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::INC16r: case X86::INC64_16r: - if (DisableLEA16) return 0; + if (DisableLEA16) + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addReg(Dest, RegState::Define | @@ -1223,7 +1298,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::DEC16r: case X86::DEC64_16r: - if (DisableLEA16) return 0; + if (DisableLEA16) + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addReg(Dest, RegState::Define | @@ -1246,7 +1322,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, break; } case X86::ADD16rr: { - if (DisableLEA16) return 0; + if (DisableLEA16) + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); @@ -1261,56 +1338,32 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD64ri32: case X86::ADD64ri8: assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - if (MI->getOperand(2).isImm()) - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, MI->getOperand(2).getImm()); + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, MI->getOperand(2).getImm()); break; case X86::ADD32ri: - case X86::ADD32ri8: + case X86::ADD32ri8: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - if (MI->getOperand(2).isImm()) { - unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), + unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); - } break; + } case X86::ADD16ri: case X86::ADD16ri8: - if (DisableLEA16) return 0; + if (DisableLEA16) + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - if (MI->getOperand(2).isImm()) - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, MI->getOperand(2).getImm()); - break; - case X86::SHL16ri: - if (DisableLEA16) return 0; - case X86::SHL32ri: - case X86::SHL64ri: { - assert(MI->getNumOperands() >= 3 && MI->getOperand(2).isImm() && - "Unknown shl instruction!"); - unsigned ShAmt = MI->getOperand(2).getImm(); - if (ShAmt == 1 || ShAmt == 2 || ShAmt == 3) { - X86AddressMode AM; - AM.Scale = 1 << ShAmt; - AM.IndexReg = Src; - unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r - : (MIOpc == X86::SHL32ri - ? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r); - NewMI = addFullAddress(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), AM); - if (isKill) - NewMI->getOperand(3).setIsKill(true); - } + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, MI->getOperand(2).getImm()); break; } - } } } @@ -1571,14 +1624,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock::iterator I = MBB.end(); while (I != MBB.begin()) { --I; - // Working from the bottom, when we see a non-terminator - // instruction, we're done. + + // Working from the bottom, when we see a non-terminator instruction, we're + // done. if (!isBrAnalysisUnpredicatedTerminator(I, *this)) break; - // A terminator that isn't a branch can't easily be handled - // by this analysis. + + // A terminator that isn't a branch can't easily be handled by this + // analysis. if (!I->getDesc().isBranch()) return true; + // Handle unconditional branches. if (I->getOpcode() == X86::JMP) { if (!AllowModify) { @@ -1587,10 +1643,12 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, } // If the block has any instructions after a JMP, delete them. - while (next(I) != MBB.end()) - next(I)->eraseFromParent(); + while (llvm::next(I) != MBB.end()) + llvm::next(I)->eraseFromParent(); + Cond.clear(); FBB = 0; + // Delete the JMP if it's equivalent to a fall-through. if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { TBB = 0; @@ -1598,14 +1656,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, I = MBB.end(); continue; } + // TBB is used to indicate the unconditinal destination. TBB = I->getOperand(0).getMBB(); continue; } + // Handle conditional branches. X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode()); if (BranchCode == X86::COND_INVALID) return true; // Can't handle indirect branch. + // Working from the bottom, handle the first conditional branch. if (Cond.empty()) { FBB = TBB; @@ -1613,24 +1674,26 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, Cond.push_back(MachineOperand::CreateImm(BranchCode)); continue; } - // Handle subsequent conditional branches. Only handle the case - // where all conditional branches branch to the same destination - // and their condition opcodes fit one of the special - // multi-branch idioms. + + // Handle subsequent conditional branches. Only handle the case where all + // conditional branches branch to the same destination and their condition + // opcodes fit one of the special multi-branch idioms. assert(Cond.size() == 1); assert(TBB); - // Only handle the case where all conditional branches branch to - // the same destination. + + // Only handle the case where all conditional branches branch to the same + // destination. if (TBB != I->getOperand(0).getMBB()) return true; - X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); + // If the conditions are the same, we can leave them alone. + X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); if (OldBranchCode == BranchCode) continue; - // If they differ, see if they fit one of the known patterns. - // Theoretically we could handle more patterns here, but - // we shouldn't expect to see them if instruction selection - // has done a reasonable job. + + // If they differ, see if they fit one of the known patterns. Theoretically, + // we could handle more patterns here, but we shouldn't expect to see them + // if instruction selection has done a reasonable job. if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_E) || (OldBranchCode == X86::COND_E && @@ -1643,6 +1706,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, BranchCode = X86::COND_NE_OR_P; else return true; + // Update the MachineOperand. Cond[0].setImm(BranchCode); } @@ -2713,27 +2777,6 @@ unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, return I->second.first; } -bool X86InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case X86::TCRETURNri: - case X86::TCRETURNdi: - case X86::RET: // Return. - case X86::RETI: - case X86::TAILJMPd: - case X86::TAILJMPr: - case X86::TAILJMPm: - case X86::JMP: // Uncond branch. - case X86::JMP32r: // Indirect branch. - case X86::JMP64r: // Indirect branch (64-bit). - case X86::JMP32m: // Indirect branch through mem. - case X86::JMP64m: // Indirect branch through mem (64-bit). - return true; - default: return false; - } -} - bool X86InstrInfo:: ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert(Cond.size() == 1 && "Invalid X86 branch condition!"); diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index c6daa25..b83441d 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -457,11 +457,14 @@ public: /// hasLoadFromStackSlot - If the specified machine instruction has /// a load from a stack slot, return true along with the FrameIndex - /// of the loaded stack slot. If not, return false. Unlike + /// of the loaded stack slot and the machine mem operand containing + /// the reference. If not, return false. Unlike /// isLoadFromStackSlot, this returns true for any instructions that /// loads from the stack. This is a hint only and may not catch all /// cases. - bool hasLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + bool hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination @@ -472,11 +475,13 @@ public: /// hasStoreToStackSlot - If the specified machine instruction has a /// store to a stack slot, return true along with the FrameIndex of - /// the loaded stack slot. If not, return false. Unlike - /// isStoreToStackSlot, this returns true for any instructions that - /// loads from the stack. This is a hint only and may not catch all - /// cases. - bool hasStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; + /// the loaded stack slot and the machine mem operand containing the + /// reference. If not, return false. Unlike isStoreToStackSlot, + /// this returns true for any instructions that loads from the + /// stack. This is a hint only and may not catch all cases. + bool hasStoreToStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const; bool isReallyTriviallyReMaterializable(const MachineInstr *MI, AliasAnalysis *AA) const; @@ -595,7 +600,6 @@ public: bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex = 0) const; - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; @@ -633,6 +637,11 @@ public: unsigned getGlobalBaseReg(MachineFunction *MF) const; private: + MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, + MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const; + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, unsigned OpNum, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 90ef1f4..3cc1853 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -87,6 +87,7 @@ def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>; def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond, [SDNPHasChain]>; def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>; +def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC>; def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, @@ -816,7 +817,7 @@ def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), let neverHasSideEffects = 1 in def LEA16r : I<0x8D, MRMSrcMem, - (outs GR16:$dst), (ins i32mem:$src), + (outs GR16:$dst), (ins lea32mem:$src), "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize; let isReMaterializable = 1 in def LEA32r : I<0x8D, MRMSrcMem, @@ -3059,6 +3060,21 @@ let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags let Uses = [EFLAGS] in { +// Use sbb to materialize carry bit. + +let Defs = [EFLAGS], isCodeGenOnly = 1 in { +def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), + "sbb{b}\t$dst, $dst", + [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), + "sbb{w}\t$dst, $dst", + [(set GR16:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>, + OpSize; +def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), + "sbb{l}\t$dst, $dst", + [(set GR32:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; +} // isCodeGenOnly + def SETEr : I<0x94, MRM0r, (outs GR8 :$dst), (ins), "sete\t$dst", @@ -4169,6 +4185,12 @@ def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)), addr:$dst), (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; +// (anyext (setcc_carry)) -> (zext (setcc_carry)) +def : Pat<(i16 (anyext (X86setcc_c X86_COND_B, EFLAGS))), + (SETB_C16r)>; +def : Pat<(i32 (anyext (X86setcc_c X86_COND_B, EFLAGS))), + (SETB_C32r)>; + //===----------------------------------------------------------------------===// // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index dfdd4ce..62841f8 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2083,7 +2083,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4i32 (pshufd:$src2 - (bc_v4i32(memopv2i64 addr:$src1)), + (bc_v4i32 (memopv2i64 addr:$src1)), (undef))))]>; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 33852bd..d96aafd 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -423,21 +423,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// -static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) { - unsigned MaxAlign = 0; - - for (int i = FFI->getObjectIndexBegin(), - e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) - continue; - - unsigned Align = FFI->getObjectAlignment(i); - MaxAlign = std::max(MaxAlign, Align); - } - - return MaxAlign; -} - /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. @@ -638,10 +623,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Calculate and set max stack object alignment early, so we can decide // whether we will need stack realignment (and thus FP). - unsigned MaxAlign = std::max(MFI->getMaxAlignment(), - calculateMaxStackAlignment(MFI)); - - MFI->setMaxAlignment(MaxAlign); + MFI->calculateMaxStackAlignment(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); @@ -741,7 +723,7 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB, if (MBBI == MBB.end()) return; - MachineBasicBlock::iterator NI = next(MBBI); + MachineBasicBlock::iterator NI = llvm::next(MBBI); if (NI == MBB.end()) return; unsigned Opc = NI->getOpcode(); @@ -775,7 +757,7 @@ static int mergeSPUpdates(MachineBasicBlock &MBB, return 0; MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; - MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI); + MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI); unsigned Opc = PI->getOpcode(); int Offset = 0; @@ -1001,7 +983,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { } // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(FramePtr); @@ -1482,45 +1464,3 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { } #include "X86GenRegisterInfo.inc" - -namespace { - struct MSAC : public MachineFunctionPass { - static char ID; - MSAC() : MachineFunctionPass(&ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF) { - MachineFrameInfo *FFI = MF.getFrameInfo(); - MachineRegisterInfo &RI = MF.getRegInfo(); - - // Calculate max stack alignment of all already allocated stack objects. - unsigned MaxAlign = calculateMaxStackAlignment(FFI); - - // Be over-conservative: scan over all vreg defs and find, whether vector - // registers are used. If yes - there is probability, that vector register - // will be spilled and thus stack needs to be aligned properly. - for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; - RegNum < RI.getLastVirtReg(); ++RegNum) - MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment()); - - if (FFI->getMaxAlignment() == MaxAlign) - return false; - - FFI->setMaxAlignment(MaxAlign); - return true; - } - - virtual const char *getPassName() const { - return "X86 Maximal Stack Alignment Calculator"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - }; - - char MSAC::ID = 0; -} - -FunctionPass* -llvm::createX86MaxStackAlignmentCalculatorPass() { return new MSAC(); } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 661f560..75cdbad 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -367,5 +367,5 @@ bool X86Subtarget::enablePostRAScheduler( RegClassVector& CriticalPathRCs) const { Mode = TargetSubtarget::ANTIDEP_CRITICAL; CriticalPathRCs.clear(); - return OptLevel >= CodeGenOpt::Default; + return OptLevel >= CodeGenOpt::Aggressive; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 0cda8bc..0152121 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -163,7 +163,7 @@ bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // Calculate and set max stack object alignment early, so we can decide // whether we will need stack realignment (and thus FP). - PM.add(createX86MaxStackAlignmentCalculatorPass()); + PM.add(createMaxStackAlignmentCalculatorPass()); return false; // -print-machineinstr shouldn't print after this. } diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index e616fe6..5a54844 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -453,26 +453,6 @@ bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } -/// BlockHasNoFallThrough - Analyse if MachineBasicBlock does not -/// fall-through into its successor block. -bool XCoreInstrInfo:: -BlockHasNoFallThrough(const MachineBasicBlock &MBB) const -{ - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case XCore::RETSP_u6: // Return. - case XCore::RETSP_lu6: - case XCore::BAU_1r: // Indirect branch. - case XCore::BRFU_u6: // Uncond branch. - case XCore::BRFU_lu6: - case XCore::BRBU_u6: - case XCore::BRBU_lu6: - return true; - default: return false; - } -} - /// ReverseBranchCondition - Return the inverse opcode of the /// specified Branch instruction. bool XCoreInstrInfo:: diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index 24230ac..3e0a765 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -87,8 +87,6 @@ public: MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI) const; - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; - virtual bool ReverseBranchCondition( SmallVectorImpl<MachineOperand> &Cond) const; }; |