diff options
Diffstat (limited to 'contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp | 1295 |
1 files changed, 1076 insertions, 219 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index cf8c5ba..e44e356 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "arm-isel" #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" #include "ARMISelLowering.h" @@ -24,6 +23,7 @@ #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -38,6 +38,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -106,7 +107,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::i64 && ElemTy != MVT::f64) - setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); if (ElemTy != MVT::i32) { setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); @@ -178,6 +179,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) RegInfo = TM.getRegisterInfo(); Itins = TM.getInstrItineraryData(); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + if (Subtarget->isTargetDarwin()) { // Uses VFP for Thumb libfuncs if available. if (Subtarget->isThumb() && Subtarget->hasVFP2()) { @@ -419,6 +422,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); } + // Use divmod compiler-rt calls for iOS 5.0 and later. + if (Subtarget->getTargetTriple().getOS() == Triple::IOS && + !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { + setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); + setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); + } + if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); else @@ -453,7 +463,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v2f64, Expand); setOperationAction(ISD::FREM, MVT::v2f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); - setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); + setOperationAction(ISD::SETCC, MVT::v2f64, Expand); setOperationAction(ISD::FNEG, MVT::v2f64, Expand); setOperationAction(ISD::FABS, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); @@ -485,8 +495,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SDIV, MVT::v8i8, Custom); setOperationAction(ISD::UDIV, MVT::v4i16, Custom); setOperationAction(ISD::UDIV, MVT::v8i8, Custom); - setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); - setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); + setOperationAction(ISD::SETCC, MVT::v1i64, Expand); + setOperationAction(ISD::SETCC, MVT::v2i64, Expand); // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with // a destination type that is wider than the source. setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); @@ -551,6 +561,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); + if (!Subtarget->isThumb1Only()) { + // FIXME: We should do this for Thumb1 as well. + setOperationAction(ISD::ADDC, MVT::i32, Custom); + setOperationAction(ISD::ADDE, MVT::i32, Custom); + setOperationAction(ISD::SUBC, MVT::i32, Custom); + setOperationAction(ISD::SUBE, MVT::i32, Custom); + } + // ARM does not have ROTL. setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Custom); @@ -596,62 +614,46 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. + // FIXME: This should be checking for v6k, not just v6. if (Subtarget->hasDataBarrier() || (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { // membarrier needs custom lowering; the rest are legal and handled // normally. setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + // Custom lowering for 64-bit ops + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); + // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. + setInsertFencesForAtomic(true); } else { // Set them all for expansion, which will force libcalls. setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); + // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the + // Unordered/Monotonic case. + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); // Since the libcalls include locking, fold in the fences setShouldFoldAtomicFences(true); } - // 64-bit versions are always libcalls (for now) - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); setOperationAction(ISD::PREFETCH, MVT::Other, Custom); @@ -839,6 +841,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; + case ARMISD::ADDC: return "ARMISD::ADDC"; + case ARMISD::ADDE: return "ARMISD::ADDE"; + case ARMISD::SUBC: return "ARMISD::SUBC"; + case ARMISD::SUBE: return "ARMISD::SUBE"; + case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; @@ -935,6 +942,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { } } +EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { + if (!VT.isVector()) return getPointerTy(); + return VT.changeVectorElementTypeToInteger(); +} + /// getRegClassFor - Return the register class that should be used for the /// specified value type. TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { @@ -1210,8 +1222,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, MachineFunction &MF = DAG.getMachineFunction(); bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool IsSibCall = false; - // Temporarily disable tail calls so things don't break. - if (!EnableARMTailCalls) + // Disable tail calls if they're not supported. + if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) isTailCall = false; if (isTailCall) { // Check if it's really possible to do a tail call. @@ -1336,10 +1348,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, MVT::i32); + // TODO: Disable AlwaysInline when it becomes possible + // to emit a nested call sequence. MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, - /*AlwaysInline=*/false, + /*AlwaysInline=*/true, MachinePointerInfo(0), MachinePointerInfo(0))); @@ -1404,9 +1418,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const GlobalValue *GV = G->getGlobal(); // Create a constant pool entry for the callee address unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, - ARMPCLabelIndex, - ARMCP::CPValue, 0); + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); + // Get the address of the callee into a register SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -1419,8 +1433,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create a constant pool entry for the callee address unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), - Sym, ARMPCLabelIndex, 0); + ARMConstantPoolValue *CPV = + ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, + ARMPCLabelIndex, 0); // Get the address of the callee into a register SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -1441,9 +1456,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // tBX takes a register source operand. if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, - ARMPCLabelIndex, - ARMCP::CPValue, 4); + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, @@ -1470,8 +1484,9 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const char *Sym = S->getSymbol(); if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), - Sym, ARMPCLabelIndex, 4); + ARMConstantPoolValue *CPV = + ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, + ARMPCLabelIndex, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, @@ -1940,9 +1955,9 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, } else { unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, - ARMCP::CPBlockAddress, - PCAdj); + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, + ARMCP::CPBlockAddress, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); @@ -1966,8 +1981,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, - ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); + ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, + ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, @@ -1982,11 +1997,11 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, ArgListTy Args; ArgListEntry Entry; Entry.Node = Argument; - Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); + Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); // FIXME: is there useful debug info available here? std::pair<SDValue, SDValue> CallResult = - LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), + LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); @@ -2013,8 +2028,9 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, // Initial exec model. unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, - ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true); + ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, + ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, + true); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, @@ -2030,7 +2046,8 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, false, false, 0); } else { // local exec model - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF); + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, @@ -2066,7 +2083,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, if (RelocM == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); + ARMConstantPoolConstant::Create(GV, + UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), @@ -2135,7 +2153,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, ARMPCLabelIndex = AFI->createPICLabelUId(); unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); + ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, + PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -2167,9 +2186,9 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), - "_GLOBAL_OFFSET_TABLE_", - ARMPCLabelIndex, PCAdj); + ARMConstantPoolValue *CPV = + ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", + ARMPCLabelIndex, PCAdj); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, @@ -2191,7 +2210,8 @@ SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue Val = DAG.getConstant(0, MVT::i32); - return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), + return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, + DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1), Val); } @@ -2224,8 +2244,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, - ARMCP::CPLSDA, PCAdj); + ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, + ARMCP::CPLSDA, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = @@ -2277,6 +2297,25 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, DAG.getConstant(DMBOpt, MVT::i32)); } + +static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + // FIXME: handle "fence singlethread" more efficiently. + DebugLoc dl = Op.getDebugLoc(); + if (!Subtarget->hasDataBarrier()) { + // Some ARMv6 cpus can support data barriers with an mcr instruction. + // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get + // here. + assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && + "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); + return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); + } + + return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(ARM_MB::ISH, MVT::i32)); +} + static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { // ARM pre v5TE and Thumb1 does not have preload instructions. @@ -2754,7 +2793,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp); + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); } ARMCC::CondCodes CondCode, CondCode2; @@ -2993,8 +3032,8 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - EVT OperandVT = Op.getOperand(0).getValueType(); - assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!"); + assert(Op.getOperand(0).getValueType() == MVT::v4i16 && + "Invalid type for custom lowering!"); if (VT != MVT::v4f32) return DAG.UnrollVectorOp(Op.getNode()); @@ -3905,8 +3944,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } // Try an immediate VMVN. - uint64_t NegatedImm = (SplatBits.getZExtValue() ^ - ((1LL << SplatBitSize) - 1)); + uint64_t NegatedImm = (~SplatBits).getZExtValue(); Val = isNEONModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, VmovVT, VT.is128BitVector(), @@ -4019,6 +4057,14 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // A shuffle can only come from building a vector from various // elements of other vectors. return SDValue(); + } else if (V.getOperand(0).getValueType().getVectorElementType() != + VT.getVectorElementType()) { + // This code doesn't know how to handle shuffles where the vector + // element types do not match (this happens because type legalization + // promotes the return type of EXTRACT_VECTOR_ELT). + // FIXME: It might be appropriate to extend this code to handle + // mismatched types. + return SDValue(); } // Record this extraction against the appropriate vector if possible... @@ -4819,6 +4865,71 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { return N0; } +static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getNode()->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + + unsigned Opc; + bool ExtraOp = false; + switch (Op.getOpcode()) { + default: assert(0 && "Invalid code"); + case ISD::ADDC: Opc = ARMISD::ADDC; break; + case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; + case ISD::SUBC: Opc = ARMISD::SUBC; break; + case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break; + } + + if (!ExtraOp) + return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + Op.getOperand(1)); + return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + Op.getOperand(1), Op.getOperand(2)); +} + +static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { + // Monotonic load/store is legal for all targets + if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) + return Op; + + // Aquire/Release load/store is not legal for targets without a + // dmb or equivalent available. + return SDValue(); +} + + +static void +ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, + SelectionDAG &DAG, unsigned NewOp) { + DebugLoc dl = Node->getDebugLoc(); + assert (Node->getValueType(0) == MVT::i64 && + "Only know how to expand i64 atomics"); + + SmallVector<SDValue, 6> Ops; + Ops.push_back(Node->getOperand(0)); // Chain + Ops.push_back(Node->getOperand(1)); // Ptr + // Low part of Val1 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(2), DAG.getIntPtrConstant(0))); + // High part of Val1 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(2), DAG.getIntPtrConstant(1))); + if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) { + // High part of Val1 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(3), DAG.getIntPtrConstant(0))); + // High part of Val2 + Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + Node->getOperand(3), DAG.getIntPtrConstant(1))); + } + SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); + SDValue Result = + DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64, + cast<MemSDNode>(Node)->getMemOperand()); + SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); + Results.push_back(Result.getValue(2)); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); @@ -4834,6 +4945,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); @@ -4856,7 +4968,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRL_PARTS: case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); - case ISD::VSETCC: return LowerVSETCC(Op, DAG); + case ISD::SETCC: return LowerVSETCC(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); @@ -4865,6 +4977,12 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SDIV: return LowerSDIV(Op, DAG); case ISD::UDIV: return LowerUDIV(Op, DAG); + case ISD::ADDC: + case ISD::ADDE: + case ISD::SUBC: + case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); } return SDValue(); } @@ -4886,6 +5004,30 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); break; + case ISD::ATOMIC_LOAD_ADD: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); + return; + case ISD::ATOMIC_LOAD_AND: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG); + return; + case ISD::ATOMIC_LOAD_NAND: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG); + return; + case ISD::ATOMIC_LOAD_OR: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG); + return; + case ISD::ATOMIC_LOAD_SUB: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG); + return; + case ISD::ATOMIC_LOAD_XOR: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG); + return; + case ISD::ATOMIC_SWAP: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG); + return; + case ISD::ATOMIC_CMP_SWAP: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); + return; } if (Res.getNode()) Results.push_back(Res); @@ -4963,7 +5105,10 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // cmp dest, oldval // bne exitMBB BB = loop1MBB; - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + if (ldrOpc == ARM::t2LDREX) + MIB.addImm(0); + AddDefaultPred(MIB); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(dest).addReg(oldval)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -4976,8 +5121,10 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // cmp scratch, #0 // bne loop1MBB BB = loop2MBB; - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval) - .addReg(ptr)); + MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); + if (strOpc == ARM::t2STREX) + MIB.addImm(0); + AddDefaultPred(MIB); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(scratch).addImm(0)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -5063,7 +5210,10 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + if (ldrOpc == ARM::t2LDREX) + MIB.addImm(0); + AddDefaultPred(MIB); if (BinOpcode) { // operand order needs to go the other way for NAND if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) @@ -5074,8 +5224,10 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, addReg(dest).addReg(incr)).addReg(0); } - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) - .addReg(ptr)); + MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); + if (strOpc == ARM::t2STREX) + MIB.addImm(0); + AddDefaultPred(MIB); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(scratch).addImm(0)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -5125,12 +5277,12 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, case 1: ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; - extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr; + extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; break; case 2: ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; - extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr; + extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; break; case 4: ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; @@ -5170,12 +5322,17 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + if (ldrOpc == ARM::t2LDREX) + MIB.addImm(0); + AddDefaultPred(MIB); // Sign extend the value, if necessary. if (signExtend && extendOpc) { oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest)); + AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) + .addReg(dest) + .addImm(0)); } // Build compare and cmov instructions. @@ -5184,8 +5341,10 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) - .addReg(ptr)); + MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); + if (strOpc == ARM::t2STREX) + MIB.addImm(0); + AddDefaultPred(MIB); AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(scratch).addImm(0)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -5203,79 +5362,596 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, return BB; } -static -MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { - for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) - if (*I != Succ) - return *I; - llvm_unreachable("Expecting a BB with two successors!"); -} +MachineBasicBlock * +ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Op1, unsigned Op2, + bool NeedsCarry, bool IsCmpxchg) const { + // This also handles ATOMIC_SWAP, indicated by Op1==0. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); -// FIXME: This opcode table should obviously be expressed in the target -// description. We probably just need a "machine opcode" value in the pseudo -// instruction. But the ideal solution maybe to simply remove the "S" version -// of the opcode altogether. -struct AddSubFlagsOpcodePair { - unsigned PseudoOpc; - unsigned MachineOpc; -}; + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; -static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { - {ARM::ADCSri, ARM::ADCri}, - {ARM::ADCSrr, ARM::ADCrr}, - {ARM::ADCSrs, ARM::ADCrs}, - {ARM::SBCSri, ARM::SBCri}, - {ARM::SBCSrr, ARM::SBCrr}, - {ARM::SBCSrs, ARM::SBCrs}, - {ARM::RSBSri, ARM::RSBri}, - {ARM::RSBSrr, ARM::RSBrr}, - {ARM::RSBSrs, ARM::RSBrs}, - {ARM::RSCSri, ARM::RSCri}, - {ARM::RSCSrs, ARM::RSCrs}, - {ARM::t2ADCSri, ARM::t2ADCri}, - {ARM::t2ADCSrr, ARM::t2ADCrr}, - {ARM::t2ADCSrs, ARM::t2ADCrs}, - {ARM::t2SBCSri, ARM::t2SBCri}, - {ARM::t2SBCSrr, ARM::t2SBCrr}, - {ARM::t2SBCSrs, ARM::t2SBCrs}, - {ARM::t2RSBSri, ARM::t2RSBri}, - {ARM::t2RSBSrs, ARM::t2RSBrs}, -}; + unsigned destlo = MI->getOperand(0).getReg(); + unsigned desthi = MI->getOperand(1).getReg(); + unsigned ptr = MI->getOperand(2).getReg(); + unsigned vallo = MI->getOperand(3).getReg(); + unsigned valhi = MI->getOperand(4).getReg(); + DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); -// Convert and Add or Subtract with Carry and Flags to a generic opcode with -// CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>). -// -// FIXME: Somewhere we should assert that CPSR<def> is in the correct -// position to be recognized by the target descrition as the 'S' bit. -bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI, - MachineBasicBlock *BB) const { - unsigned OldOpc = MI->getOpcode(); - unsigned NewOpc = 0; + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + if (isThumb2) { + MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass); + MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass); + MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass); + } - // This is only called for instructions that need remapping, so iterating over - // the tiny opcode table is not costly. - static const int NPairs = - sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair); - for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0], - *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) { - if (OldOpc == Pair->PseudoOpc) { - NewOpc = Pair->MachineOpc; - break; + unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD; + unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD; + + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *contBB = 0, *cont2BB = 0; + if (IsCmpxchg) { + contBB = MF->CreateMachineBasicBlock(LLVM_BB); + cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); + } + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + if (IsCmpxchg) { + MF->insert(It, contBB); + MF->insert(It, cont2BB); + } + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + TargetRegisterClass *TRC = + isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; + unsigned storesuccess = MRI.createVirtualRegister(TRC); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldrexd r2, r3, ptr + // <binopa> r0, r2, incr + // <binopb> r1, r3, incr + // strexd storesuccess, r0, r1, ptr + // cmp storesuccess, #0 + // bne- loopMBB + // fallthrough --> exitMBB + // + // Note that the registers are explicitly specified because there is not any + // way to force the register allocator to allocate a register pair. + // + // FIXME: The hardcoded registers are not necessary for Thumb2, but we + // need to properly enforce the restriction that the two output registers + // for ldrexd must be different. + BB = loopMBB; + // Load + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) + .addReg(ARM::R2, RegState::Define) + .addReg(ARM::R3, RegState::Define).addReg(ptr)); + // Copy r2/r3 into dest. (This copy will normally be coalesced.) + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3); + + if (IsCmpxchg) { + // Add early exit + for (unsigned i = 0; i < 2; i++) { + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : + ARM::CMPrr)) + .addReg(i == 0 ? destlo : desthi) + .addReg(i == 0 ? vallo : valhi)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + BB->addSuccessor(exitMBB); + BB->addSuccessor(i == 0 ? contBB : cont2BB); + BB = (i == 0 ? contBB : cont2BB); } + + // Copy to physregs for strexd + unsigned setlo = MI->getOperand(5).getReg(); + unsigned sethi = MI->getOperand(6).getReg(); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi); + } else if (Op1) { + // Perform binary operation + AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0) + .addReg(destlo).addReg(vallo)) + .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); + AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1) + .addReg(desthi).addReg(valhi)).addReg(0); + } else { + // Copy to physregs for strexd + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi); } - if (!NewOpc) - return false; + // Store + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) + .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr)); + // Cmp+jump + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(storesuccess).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + +/// EmitBasePointerRecalculation - For functions using a base pointer, we +/// rematerialize it (via the frame pointer). +void ARMTargetLowering:: +EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); + MachineFunction &MF = *MI->getParent()->getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); + + if (!RI.hasBasePointer(MF)) return; + + MachineBasicBlock::iterator MBBI = MI; + + int32_t NumBytes = AFI->getFramePtrSpillOffset(); + unsigned FramePtr = RI.getFrameRegister(MF); + assert(MF.getTarget().getFrameLowering()->hasFP(MF) && + "Base pointer without frame pointer?"); + + if (AFI->isThumb2Function()) + llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, *AII); + else if (AFI->isThumbFunction()) + llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, *AII, RI); + else + llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, *AII); + + if (!RI.needsStackRealignment(MF)) return; + + // If there's dynamic realignment, adjust for it. + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned MaxAlign = MFI->getMaxAlignment(); + assert(!AFI->isThumb1OnlyFunction()); + + // Emit bic r6, r6, MaxAlign + unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri; + AddDefaultCC( + AddDefaultPred( + BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6) + .addReg(ARM::R6, RegState::Kill) + .addImm(MaxAlign - 1))); +} + +/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and +/// registers the function context. +void ARMTargetLowering:: +SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB, int FI) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); - for (unsigned i = 0; i < MI->getNumOperands(); ++i) - MIB.addOperand(MI->getOperand(i)); - AddDefaultPred(MIB); - MIB.addReg(ARM::CPSR, RegState::Define); // S bit + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + MachineConstantPool *MCP = MF->getConstantPool(); + ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); + const Function *F = MF->getFunction(); + + bool isThumb = Subtarget->isThumb(); + bool isThumb2 = Subtarget->isThumb2(); + + unsigned PCLabelId = AFI->createPICLabelUId(); + unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; + ARMConstantPoolValue *CPV = + ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); + unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); + + const TargetRegisterClass *TRC = + isThumb ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; + + // Grab constant pool and fixed stack memory operands. + MachineMemOperand *CPMMO = + MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(), + MachineMemOperand::MOLoad, 4, 4); + + MachineMemOperand *FIMMOSt = + MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOStore, 4, 4); + + EmitBasePointerRecalculation(MI, MBB, DispatchBB); + + // Load the address of the dispatch MBB into the jump buffer. + if (isThumb2) { + // Incoming value: jbuf + // ldr.n r5, LCPI1_1 + // orr r5, r5, #1 + // add r5, pc + // str r5, [$jbuf, #+4] ; &jbuf[1] + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) + .addConstantPoolIndex(CPI) + .addMemOperand(CPMMO)); + // Set the low bit because of thumb mode. + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + AddDefaultCC( + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) + .addReg(NewVReg1, RegState::Kill) + .addImm(0x01))); + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) + .addReg(NewVReg2, RegState::Kill) + .addImm(PCLabelId); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) + .addReg(NewVReg3, RegState::Kill) + .addFrameIndex(FI) + .addImm(36) // &jbuf[1] :: pc + .addMemOperand(FIMMOSt)); + } else if (isThumb) { + // Incoming value: jbuf + // ldr.n r1, LCPI1_4 + // add r1, pc + // mov r2, #1 + // orrs r1, r2 + // add r2, $jbuf, #+4 ; &jbuf[1] + // str r1, [r2] + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) + .addConstantPoolIndex(CPI) + .addMemOperand(CPMMO)); + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) + .addReg(NewVReg1, RegState::Kill) + .addImm(PCLabelId); + // Set the low bit because of thumb mode. + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) + .addReg(ARM::CPSR, RegState::Define) + .addImm(1)); + unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg3, RegState::Kill)); + unsigned NewVReg5 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5) + .addFrameIndex(FI) + .addImm(36)); // &jbuf[1] :: pc + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) + .addReg(NewVReg4, RegState::Kill) + .addReg(NewVReg5, RegState::Kill) + .addImm(0) + .addMemOperand(FIMMOSt)); + } else { + // Incoming value: jbuf + // ldr r1, LCPI1_1 + // add r1, pc, r1 + // str r1, [$jbuf, #+4] ; &jbuf[1] + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) + .addConstantPoolIndex(CPI) + .addImm(0) + .addMemOperand(CPMMO)); + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) + .addReg(NewVReg1, RegState::Kill) + .addImm(PCLabelId)); + AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) + .addReg(NewVReg2, RegState::Kill) + .addFrameIndex(FI) + .addImm(36) // &jbuf[1] :: pc + .addMemOperand(FIMMOSt)); + } +} + +MachineBasicBlock *ARMTargetLowering:: +EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); + MachineFrameInfo *MFI = MF->getFrameInfo(); + int FI = MFI->getFunctionContextIndex(); + + const TargetRegisterClass *TRC = + Subtarget->isThumb() ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; + + // Get a mapping of the call site numbers to all of the landing pads they're + // associated with. + DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; + unsigned MaxCSNum = 0; + MachineModuleInfo &MMI = MF->getMMI(); + for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { + if (!BB->isLandingPad()) continue; + + // FIXME: We should assert that the EH_LABEL is the first MI in the landing + // pad. + for (MachineBasicBlock::iterator + II = BB->begin(), IE = BB->end(); II != IE; ++II) { + if (!II->isEHLabel()) continue; + + MCSymbol *Sym = II->getOperand(0).getMCSymbol(); + if (!MMI.hasCallSiteLandingPad(Sym)) continue; + + SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym); + for (SmallVectorImpl<unsigned>::iterator + CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); + CSI != CSE; ++CSI) { + CallSiteNumToLPad[*CSI].push_back(BB); + MaxCSNum = std::max(MaxCSNum, *CSI); + } + break; + } + } + + // Get an ordered list of the machine basic blocks for the jump table. + std::vector<MachineBasicBlock*> LPadList; + SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs; + LPadList.reserve(CallSiteNumToLPad.size()); + for (unsigned I = 1; I <= MaxCSNum; ++I) { + SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I]; + for (SmallVectorImpl<MachineBasicBlock*>::iterator + II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { + LPadList.push_back(*II); + InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); + } + } + + assert(!LPadList.empty() && + "No landing pad destinations for the dispatch jump table!"); + + // Create the jump table and associated information. + MachineJumpTableInfo *JTI = + MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); + unsigned MJTI = JTI->createJumpTableIndex(LPadList); + unsigned UId = AFI->createJumpTableUId(); + + // Create the MBBs for the dispatch code. + + // Shove the dispatch's address into the return slot in the function context. + MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); + DispatchBB->setIsLandingPad(); + + MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); + BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP)); + DispatchBB->addSuccessor(TrapBB); + + MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); + DispatchBB->addSuccessor(DispContBB); + + // Insert and renumber MBBs. + MachineBasicBlock *Last = &MF->back(); + MF->insert(MF->end(), DispatchBB); + MF->insert(MF->end(), DispContBB); + MF->insert(MF->end(), TrapBB); + MF->RenumberBlocks(Last); + + // Insert code into the entry block that creates and registers the function + // context. + SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); + + MachineMemOperand *FIMMOLd = + MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOLoad | + MachineMemOperand::MOVolatile, 4, 4); + + if (Subtarget->isThumb2()) { + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) + .addFrameIndex(FI) + .addImm(4) + .addMemOperand(FIMMOLd)); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) + .addReg(NewVReg1) + .addImm(LPadList.size())); + BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) + .addMBB(TrapBB) + .addImm(ARMCC::HI) + .addReg(ARM::CPSR); + + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg2) + .addJumpTableIndex(MJTI) + .addImm(UId)); + + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + AddDefaultCC( + AddDefaultPred( + BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg3) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg1) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); + + BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) + .addReg(NewVReg3, RegState::Kill) + .addReg(NewVReg1) + .addJumpTableIndex(MJTI) + .addImm(UId); + } else if (Subtarget->isThumb()) { + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) + .addFrameIndex(FI) + .addImm(1) + .addMemOperand(FIMMOLd)); + + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) + .addReg(NewVReg1) + .addImm(LPadList.size())); + BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) + .addMBB(TrapBB) + .addImm(ARMCC::HI) + .addReg(ARM::CPSR); + + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg1) + .addImm(2)); + + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) + .addJumpTableIndex(MJTI) + .addImm(UId)); + + unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg3)); + + MachineMemOperand *JTMMOLd = + MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), + MachineMemOperand::MOLoad, 4, 4); + + unsigned NewVReg5 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) + .addReg(NewVReg4, RegState::Kill) + .addImm(0) + .addMemOperand(JTMMOLd)); + + unsigned NewVReg6 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg5, RegState::Kill) + .addReg(NewVReg3)); + + BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) + .addReg(NewVReg6, RegState::Kill) + .addJumpTableIndex(MJTI) + .addImm(UId); + } else { + unsigned NewVReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) + .addFrameIndex(FI) + .addImm(4) + .addMemOperand(FIMMOLd)); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) + .addReg(NewVReg1) + .addImm(LPadList.size())); + BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) + .addMBB(TrapBB) + .addImm(ARMCC::HI) + .addReg(ARM::CPSR); + + unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + AddDefaultCC( + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg2) + .addReg(NewVReg1) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg3) + .addJumpTableIndex(MJTI) + .addImm(UId)); + + MachineMemOperand *JTMMOLd = + MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), + MachineMemOperand::MOLoad, 4, 4); + unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + AddDefaultPred( + BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg4) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg3) + .addImm(0) + .addMemOperand(JTMMOLd)); + + BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) + .addReg(NewVReg4, RegState::Kill) + .addReg(NewVReg3) + .addJumpTableIndex(MJTI) + .addImm(UId); + } + + // Add the jump table entries as successors to the MBB. + MachineBasicBlock *PrevMBB = 0; + for (std::vector<MachineBasicBlock*>::iterator + I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { + MachineBasicBlock *CurMBB = *I; + if (PrevMBB != CurMBB) + DispContBB->addSuccessor(CurMBB); + PrevMBB = CurMBB; + } + + const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); + const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); + const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF); + for (SmallPtrSet<MachineBasicBlock*, 64>::iterator + I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { + MachineBasicBlock *BB = *I; + + // Remove the landing pad successor from the invoke block and replace it + // with the new dispatch block. + for (MachineBasicBlock::succ_iterator + SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SMBB = *SI; + if (SMBB->isLandingPad()) { + BB->removeSuccessor(SMBB); + SMBB->setIsLandingPad(false); + } + } + + BB->addSuccessor(DispatchBB); + + // Find the invoke call and mark all of the callee-saved registers as + // 'implicit defined' so that they're spilled. This prevents code from + // moving instructions to before the EH block, where they will never be + // executed. + for (MachineBasicBlock::reverse_iterator + II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { + if (!II->getDesc().isCall()) continue; + + DenseMap<unsigned, bool> DefRegs; + for (MachineInstr::mop_iterator + OI = II->operands_begin(), OE = II->operands_end(); + OI != OE; ++OI) { + if (!OI->isReg()) continue; + DefRegs[OI->getReg()] = true; + } + + MachineInstrBuilder MIB(&*II); + + for (unsigned i = 0; SavedRegs[i] != 0; ++i) { + if (!TRC->contains(SavedRegs[i])) continue; + if (!DefRegs[SavedRegs[i]]) + MIB.addReg(SavedRegs[i], RegState::ImplicitDefine | RegState::Dead); + } + + break; + } + } + + // The instruction is gone now. MI->eraseFromParent(); - return true; + + return MBB; +} + +static +MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { + for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) + if (*I != Succ) + return *I; + llvm_unreachable("Expecting a BB with two successors!"); } MachineBasicBlock * @@ -5286,12 +5962,61 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { default: { - if (RemapAddSubWithFlags(MI, BB)) - return BB; - MI->dump(); llvm_unreachable("Unexpected instr type to insert"); } + // The Thumb2 pre-indexed stores have the same MI operands, they just + // define them differently in the .td files from the isel patterns, so + // they need pseudos. + case ARM::t2STR_preidx: + MI->setDesc(TII->get(ARM::t2STR_PRE)); + return BB; + case ARM::t2STRB_preidx: + MI->setDesc(TII->get(ARM::t2STRB_PRE)); + return BB; + case ARM::t2STRH_preidx: + MI->setDesc(TII->get(ARM::t2STRH_PRE)); + return BB; + + case ARM::STRi_preidx: + case ARM::STRBi_preidx: { + unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ? + ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; + // Decode the offset. + unsigned Offset = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; + Offset = ARM_AM::getAM2Offset(Offset); + if (isSub) + Offset = -Offset; + + MachineMemOperand *MMO = *MI->memoperands_begin(); + BuildMI(*BB, MI, dl, TII->get(NewOpc)) + .addOperand(MI->getOperand(0)) // Rn_wb + .addOperand(MI->getOperand(1)) // Rt + .addOperand(MI->getOperand(2)) // Rn + .addImm(Offset) // offset (skip GPR==zero_reg) + .addOperand(MI->getOperand(5)) // pred + .addOperand(MI->getOperand(6)) + .addMemOperand(MMO); + MI->eraseFromParent(); + return BB; + } + case ARM::STRr_preidx: + case ARM::STRBr_preidx: + case ARM::STRH_preidx: { + unsigned NewOpc; + switch (MI->getOpcode()) { + default: llvm_unreachable("unexpected opcode!"); + case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; + case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; + case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; + } + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); + for (unsigned i = 0; i < MI->getNumOperands(); ++i) + MIB.addOperand(MI->getOperand(i)); + MI->eraseFromParent(); + return BB; + } case ARM::ATOMIC_LOAD_ADD_I8: return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); case ARM::ATOMIC_LOAD_ADD_I16: @@ -5370,6 +6095,31 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); + + case ARM::ATOMADD6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, + isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, + /*NeedsCarry*/ true); + case ARM::ATOMSUB6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true); + case ARM::ATOMOR6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, + isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); + case ARM::ATOMXOR6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, + isThumb2 ? ARM::t2EORrr : ARM::EORrr); + case ARM::ATOMAND6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, + isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); + case ARM::ATOMSWAP6432: + return EmitAtomicBinary64(MI, BB, 0, 0, false); + case ARM::ATOMCMPXCHG6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ false, /*IsCmpxchg*/true); + case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the @@ -5461,13 +6211,159 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B)) - .addMBB(exitMBB); + if (isThumb2) + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); + else + BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } + + case ARM::ABS: + case ARM::t2ABS: { + // To insert an ABS instruction, we have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // source vreg to test against 0, the destination vreg to set, + // the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + // It transforms + // V1 = ABS V0 + // into + // V2 = MOVS V0 + // BCC (branch to SinkBB if V0 >= 0) + // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) + // SinkBB: V1 = PHI(V2, V3) + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator BBI = BB; + ++BBI; + MachineFunction *Fn = BB->getParent(); + MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); + Fn->insert(BBI, RSBBB); + Fn->insert(BBI, SinkBB); + + unsigned int ABSSrcReg = MI->getOperand(1).getReg(); + unsigned int ABSDstReg = MI->getOperand(0).getReg(); + bool isThumb2 = Subtarget->isThumb2(); + MachineRegisterInfo &MRI = Fn->getRegInfo(); + // In Thumb mode S must not be specified if source register is the SP or + // PC and if destination register is the SP, so restrict register class + unsigned NewMovDstReg = MRI.createVirtualRegister( + isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass); + unsigned NewRsbDstReg = MRI.createVirtualRegister( + isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + SinkBB->splice(SinkBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + SinkBB->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(RSBBB); + BB->addSuccessor(SinkBB); + + // fall through to SinkMBB + RSBBB->addSuccessor(SinkBB); + + // insert a movs at the end of BB + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr), + NewMovDstReg) + .addReg(ABSSrcReg, RegState::Kill) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addReg(ARM::CPSR, RegState::Define); + + // insert a bcc with opposite CC to ARMCC::MI at the end of BB + BuildMI(BB, dl, + TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) + .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); + + // insert rsbri in RSBBB + // Note: BCC and rsbri will be converted into predicated rsbmi + // by if-conversion pass + BuildMI(*RSBBB, RSBBB->begin(), dl, + TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) + .addReg(NewMovDstReg, RegState::Kill) + .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + + // insert PHI in SinkBB, + // reuse ABSDstReg to not change uses of ABS instruction + BuildMI(*SinkBB, SinkBB->begin(), dl, + TII->get(ARM::PHI), ABSDstReg) + .addReg(NewRsbDstReg).addMBB(RSBBB) + .addReg(NewMovDstReg).addMBB(BB); + + // remove ABS instruction + MI->eraseFromParent(); + + // return last added BB + return SinkBB; + } + } +} + +void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const { + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.hasPostISelHook()) { + assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && + "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); + return; + } + + // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, + // RSC. Coming out of isel, they have an implicit CPSR def, but the optional + // operand is still set to noreg. If needed, set the optional operand's + // register to CPSR, and remove the redundant implicit def. + // + // e.g. ADCS (...opt:%noreg, CPSR<imp-def>) -> ADC (... opt:CPSR<def>). + + // Rename pseudo opcodes. + unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); + if (NewOpc) { + const ARMBaseInstrInfo *TII = + static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo()); + MI->setDesc(TII->get(NewOpc)); + } + unsigned ccOutIdx = MCID.getNumOperands() - 1; + + // Any ARM instruction that sets the 's' bit should specify an optional + // "cc_out" operand in the last operand position. + if (!MCID.hasOptionalDef() || !MCID.OpInfo[ccOutIdx].isOptionalDef()) { + assert(!NewOpc && "Optional cc_out operand required"); + return; + } + // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it + // since we already have an optional CPSR def. + bool definesCPSR = false; + bool deadCPSR = false; + for (unsigned i = MCID.getNumOperands(), e = MI->getNumOperands(); + i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { + definesCPSR = true; + if (MO.isDead()) + deadCPSR = true; + MI->RemoveOperand(i); + break; + } + } + if (!definesCPSR) { + assert(!NewOpc && "Optional cc_out operand required"); + return; + } + assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); + if (deadCPSR) { + assert(!MI->getOperand(ccOutIdx).getReg() && + "expect uninitialized optional cc_out operand"); + return; } + + // If this instruction was defined with an optional CPSR def and its dag node + // had a live implicit CPSR def, then activate the optional CPSR def. + MachineOperand &MO = MI->getOperand(ccOutIdx); + MO.setReg(ARM::CPSR); + MO.setIsDef(true); } //===----------------------------------------------------------------------===// @@ -6975,7 +7871,8 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { SDValue FalseVal = N->getOperand(0); SDValue TrueVal = N->getOperand(1); SDValue ARMcc = N->getOperand(2); - ARMCC::CondCodes CC = (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); + ARMCC::CondCodes CC = + (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); // Simplify // mov r1, r0 @@ -6995,7 +7892,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { // movne r0, y /// FIXME: Turn this into a target neutral optimization? SDValue Res; - if (CC == ARMCC::NE && FalseVal == RHS) { + if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, N->getOperand(3), Cmp); } else if (CC == ARMCC::EQ && TrueVal == RHS) { @@ -7235,7 +8132,7 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, - const Type *Ty) const { + Type *Ty) const { EVT VT = getValueType(Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; @@ -7351,7 +8248,8 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, if (Ptr->getOpcode() == ISD::ADD) { isInc = true; - ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0)); + ARM_AM::ShiftOpc ShOpcVal= + ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { Base = Ptr->getOperand(1); Offset = Ptr->getOperand(0); @@ -7536,7 +8434,7 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { if (AsmPieces.size() == 3 && AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && IA->getConstraintString().compare(0, 4, "=l,l") == 0) { - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); if (Ty && Ty->getBitWidth() == 32) return IntrinsicLowering::LowerToByteSwap(CI); } @@ -7559,6 +8457,9 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const { case 'x': return C_RegisterClass; case 't': return C_RegisterClass; case 'j': return C_Other; // Constant for movw. + // An address with a single base register. Due to the way we + // currently handle addresses it is the same as an 'r' memory constraint. + case 'Q': return C_Memory; } } else if (Constraint.size() == 2) { switch (Constraint[0]) { @@ -7582,7 +8483,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight( // but allow it at the lowest weight. if (CallOperandVal == NULL) return CW_Default; - const Type *type = CallOperandVal->getType(); + Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: @@ -7618,7 +8519,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return RCPair(0U, ARM::GPRRegisterClass); case 'h': // High regs or no regs. if (Subtarget->isThumb()) - return RCPair(0U, ARM::hGPRRegisterClass); + return RCPair(0U, ARM::hGPRRegisterClass); break; case 'r': return RCPair(0U, ARM::GPRRegisterClass); @@ -7632,15 +8533,15 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, break; case 'x': if (VT == MVT::f32) - return RCPair(0U, ARM::SPR_8RegisterClass); + return RCPair(0U, ARM::SPR_8RegisterClass); if (VT.getSizeInBits() == 64) - return RCPair(0U, ARM::DPR_8RegisterClass); + return RCPair(0U, ARM::DPR_8RegisterClass); if (VT.getSizeInBits() == 128) - return RCPair(0U, ARM::QPR_8RegisterClass); + return RCPair(0U, ARM::QPR_8RegisterClass); break; case 't': if (VT == MVT::f32) - return RCPair(0U, ARM::SPRRegisterClass); + return RCPair(0U, ARM::SPRRegisterClass); break; } } @@ -7680,12 +8581,12 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, switch (ConstraintLetter) { case 'j': - // Constant suitable for movw, must be between 0 and - // 65535. - if (Subtarget->hasV6T2Ops()) - if (CVal >= 0 && CVal <= 65535) - break; - return; + // Constant suitable for movw, must be between 0 and + // 65535. + if (Subtarget->hasV6T2Ops()) + if (CVal >= 0 && CVal <= 65535) + break; + return; case 'I': if (Subtarget->isThumb1Only()) { // This must be a constant between 0 and 255, for ADD @@ -7823,50 +8724,6 @@ ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { return false; } -int ARM::getVFPf32Imm(const APFloat &FPImm) { - APInt Imm = FPImm.bitcastToAPInt(); - uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; - int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 - int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits - - // We can handle 4 bits of mantissa. - // mantissa = (16+UInt(e:f:g:h))/16. - if (Mantissa & 0x7ffff) - return -1; - Mantissa >>= 19; - if ((Mantissa & 0xf) != Mantissa) - return -1; - - // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 - if (Exp < -3 || Exp > 4) - return -1; - Exp = ((Exp+3) & 0x7) ^ 4; - - return ((int)Sign << 7) | (Exp << 4) | Mantissa; -} - -int ARM::getVFPf64Imm(const APFloat &FPImm) { - APInt Imm = FPImm.bitcastToAPInt(); - uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; - int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 - uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL; - - // We can handle 4 bits of mantissa. - // mantissa = (16+UInt(e:f:g:h))/16. - if (Mantissa & 0xffffffffffffLL) - return -1; - Mantissa >>= 48; - if ((Mantissa & 0xf) != Mantissa) - return -1; - - // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 - if (Exp < -3 || Exp > 4) - return -1; - Exp = ((Exp+3) & 0x7) ^ 4; - - return ((int)Sign << 7) | (Exp << 4) | Mantissa; -} - bool ARM::isBitFieldInvertedMask(unsigned v) { if (v == 0xffffffff) return 0; @@ -7889,9 +8746,9 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { if (!Subtarget->hasVFP3()) return false; if (VT == MVT::f32) - return ARM::getVFPf32Imm(Imm) != -1; + return ARM_AM::getFP32Imm(Imm) != -1; if (VT == MVT::f64) - return ARM::getVFPf64Imm(Imm) != -1; + return ARM_AM::getFP64Imm(Imm) != -1; return false; } @@ -7933,7 +8790,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, // Conservatively set memVT to the entire set of vectors stored. unsigned NumElts = 0; for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { - const Type *ArgTy = I.getArgOperand(ArgI)->getType(); + Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8; |