diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AArch64')
13 files changed, 430 insertions, 212 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index dc41f2f..daa7f1d 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -367,9 +367,8 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // shoving a base register and an offset into the instruction then we may well // need to scavenge registers. We should either specifically add an // callee-save register for this purpose or allocate an extra spill slot. - bool BigStack = - (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) + MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF) || MFI->hasVarSizedObjects() // Access will be from X29: messes things up || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); @@ -392,6 +391,8 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (ExtraReg != 0) { MF.getRegInfo().setPhysRegUsed(ExtraReg); } else { + assert(RS && "Expect register scavenger to be available"); + // Create a stack slot for scavenging purposes. PrologEpilogInserter // helpfully places it near either SP or FP for us to avoid // infinitely-regression during scavenging. diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 46b8221..102c71b 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -70,6 +70,15 @@ public: return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); } + /// Used for pre-lowered address-reference nodes, so we already know + /// the fields match. This operand's job is simply to add an + /// appropriate shift operand (i.e. 0) to the MOVZ/MOVK instruction. + bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) { + Imm = N; + Shift = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + bool SelectFPZeroOperand(SDValue N, SDValue &Dummy); bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, @@ -88,6 +97,13 @@ public: bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); + SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, + unsigned Op64); + + /// Put the given constant into a pool and return a DAG which will give its + /// address. + SDValue getConstantPoolItemAddress(DebugLoc DL, const Constant *CV); + SDNode *TrySelectToMoveImm(SDNode *N); SDNode *LowerToFPLitPool(SDNode *Node); SDNode *SelectToLitPool(SDNode *N); @@ -224,12 +240,51 @@ SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) { return ResNode; } +SDValue +AArch64DAGToDAGISel::getConstantPoolItemAddress(DebugLoc DL, + const Constant *CV) { + EVT PtrVT = TLI.getPointerTy(); + + switch (TLI.getTargetMachine().getCodeModel()) { + case CodeModel::Small: { + unsigned Alignment = + TLI.getDataLayout()->getABITypeAlignment(CV->getType()); + return CurDAG->getNode( + AArch64ISD::WrapperSmall, DL, PtrVT, + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG), + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12), + CurDAG->getConstant(Alignment, MVT::i32)); + } + case CodeModel::Large: { + SDNode *LitAddr; + LitAddr = CurDAG->getMachineNode( + AArch64::MOVZxii, DL, PtrVT, + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3), + CurDAG->getTargetConstant(0, MVT::i32)); + LitAddr = CurDAG->getMachineNode( + AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC), + CurDAG->getTargetConstant(0, MVT::i32)); + LitAddr = CurDAG->getMachineNode( + AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC), + CurDAG->getTargetConstant(0, MVT::i32)); + LitAddr = CurDAG->getMachineNode( + AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC), + CurDAG->getTargetConstant(0, MVT::i32)); + return SDValue(LitAddr, 0); + } + default: + llvm_unreachable("Only small and large code models supported now"); + } +} + SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { DebugLoc DL = Node->getDebugLoc(); uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue(); int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue(); EVT DestType = Node->getValueType(0); - EVT PtrVT = TLI.getPointerTy(); // Since we may end up loading a 64-bit constant from a 32-bit entry the // constant in the pool may have a different type to the eventual node. @@ -256,14 +311,8 @@ SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(), MemType.getSizeInBits()), UnsignedVal); - SDValue PoolAddr; + SDValue PoolAddr = getConstantPoolItemAddress(DL, CV); unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(CV->getType()); - PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, - AArch64II::MO_NO_FLAG), - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, - AArch64II::MO_LO12), - CurDAG->getConstant(Alignment, MVT::i32)); return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(), PoolAddr, @@ -276,20 +325,10 @@ SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) { DebugLoc DL = Node->getDebugLoc(); const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue(); - EVT PtrVT = TLI.getPointerTy(); EVT DestType = Node->getValueType(0); unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(FV->getType()); - SDValue PoolAddr; - - assert(TM.getCodeModel() == CodeModel::Small && - "Only small code model supported"); - PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0, - AArch64II::MO_NO_FLAG), - CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0, - AArch64II::MO_LO12), - CurDAG->getConstant(Alignment, MVT::i32)); + SDValue PoolAddr = getConstantPoolItemAddress(DL, FV); return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr, MachinePointerInfo::getConstantPool(), @@ -318,6 +357,38 @@ AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos, return true; } +SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, + unsigned Op16,unsigned Op32, + unsigned Op64) { + // Mostly direct translation to the given operations, except that we preserve + // the AtomicOrdering for use later on. + AtomicSDNode *AN = cast<AtomicSDNode>(Node); + EVT VT = AN->getMemoryVT(); + + unsigned Op; + if (VT == MVT::i8) + Op = Op8; + else if (VT == MVT::i16) + Op = Op16; + else if (VT == MVT::i32) + Op = Op32; + else if (VT == MVT::i64) + Op = Op64; + else + llvm_unreachable("Unexpected atomic operation"); + + SmallVector<SDValue, 4> Ops; + for (unsigned i = 1; i < AN->getNumOperands(); ++i) + Ops.push_back(AN->getOperand(i)); + + Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); + Ops.push_back(AN->getOperand(0)); // Chain moves to the end + + return CurDAG->SelectNodeTo(Node, Op, + AN->getValueType(0), MVT::Other, + &Ops[0], Ops.size()); +} + SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); @@ -328,6 +399,78 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { } switch (Node->getOpcode()) { + case ISD::ATOMIC_LOAD_ADD: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_ADD_I8, + AArch64::ATOMIC_LOAD_ADD_I16, + AArch64::ATOMIC_LOAD_ADD_I32, + AArch64::ATOMIC_LOAD_ADD_I64); + case ISD::ATOMIC_LOAD_SUB: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_SUB_I8, + AArch64::ATOMIC_LOAD_SUB_I16, + AArch64::ATOMIC_LOAD_SUB_I32, + AArch64::ATOMIC_LOAD_SUB_I64); + case ISD::ATOMIC_LOAD_AND: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_AND_I8, + AArch64::ATOMIC_LOAD_AND_I16, + AArch64::ATOMIC_LOAD_AND_I32, + AArch64::ATOMIC_LOAD_AND_I64); + case ISD::ATOMIC_LOAD_OR: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_OR_I8, + AArch64::ATOMIC_LOAD_OR_I16, + AArch64::ATOMIC_LOAD_OR_I32, + AArch64::ATOMIC_LOAD_OR_I64); + case ISD::ATOMIC_LOAD_XOR: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_XOR_I8, + AArch64::ATOMIC_LOAD_XOR_I16, + AArch64::ATOMIC_LOAD_XOR_I32, + AArch64::ATOMIC_LOAD_XOR_I64); + case ISD::ATOMIC_LOAD_NAND: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_NAND_I8, + AArch64::ATOMIC_LOAD_NAND_I16, + AArch64::ATOMIC_LOAD_NAND_I32, + AArch64::ATOMIC_LOAD_NAND_I64); + case ISD::ATOMIC_LOAD_MIN: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_MIN_I8, + AArch64::ATOMIC_LOAD_MIN_I16, + AArch64::ATOMIC_LOAD_MIN_I32, + AArch64::ATOMIC_LOAD_MIN_I64); + case ISD::ATOMIC_LOAD_MAX: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_MAX_I8, + AArch64::ATOMIC_LOAD_MAX_I16, + AArch64::ATOMIC_LOAD_MAX_I32, + AArch64::ATOMIC_LOAD_MAX_I64); + case ISD::ATOMIC_LOAD_UMIN: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_UMIN_I8, + AArch64::ATOMIC_LOAD_UMIN_I16, + AArch64::ATOMIC_LOAD_UMIN_I32, + AArch64::ATOMIC_LOAD_UMIN_I64); + case ISD::ATOMIC_LOAD_UMAX: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_UMAX_I8, + AArch64::ATOMIC_LOAD_UMAX_I16, + AArch64::ATOMIC_LOAD_UMAX_I32, + AArch64::ATOMIC_LOAD_UMAX_I64); + case ISD::ATOMIC_SWAP: + return SelectAtomic(Node, + AArch64::ATOMIC_SWAP_I8, + AArch64::ATOMIC_SWAP_I16, + AArch64::ATOMIC_SWAP_I32, + AArch64::ATOMIC_SWAP_I64); + case ISD::ATOMIC_CMP_SWAP: + return SelectAtomic(Node, + AArch64::ATOMIC_CMP_SWAP_I8, + AArch64::ATOMIC_CMP_SWAP_I16, + AArch64::ATOMIC_CMP_SWAP_I32, + AArch64::ATOMIC_CMP_SWAP_I64); case ISD::FrameIndex: { int FI = cast<FrameIndexSDNode>(Node)->getIndex(); EVT PtrTy = TLI.getPointerTy(); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e9f4497..56f6751 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -59,13 +59,6 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) computeRegisterProperties(); - // Some atomic operations can be folded into load-acquire or store-release - // instructions on AArch64. It's marginally simpler to let LLVM expand - // everything out to a barrier and then recombine the (few) barriers we can. - setInsertFencesForAtomic(true); - setTargetDAGCombine(ISD::ATOMIC_FENCE); - setTargetDAGCombine(ISD::ATOMIC_STORE); - // We combine OR nodes for bitfield and NEON BSL operations. setTargetDAGCombine(ISD::OR); @@ -275,27 +268,34 @@ EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const { return VT.changeVectorElementTypeToInteger(); } -static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc, - unsigned &strOpc) { - switch (Size) { - default: llvm_unreachable("unsupported size for atomic binary op!"); - case 1: - ldrOpc = AArch64::LDXR_byte; - strOpc = AArch64::STXR_byte; - break; - case 2: - ldrOpc = AArch64::LDXR_hword; - strOpc = AArch64::STXR_hword; - break; - case 4: - ldrOpc = AArch64::LDXR_word; - strOpc = AArch64::STXR_word; - break; - case 8: - ldrOpc = AArch64::LDXR_dword; - strOpc = AArch64::STXR_dword; - break; - } +static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, + unsigned &LdrOpc, + unsigned &StrOpc) { + static unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword, + AArch64::LDXR_word, AArch64::LDXR_dword}; + static unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword, + AArch64::LDAXR_word, AArch64::LDAXR_dword}; + static unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword, + AArch64::STXR_word, AArch64::STXR_dword}; + static unsigned StoreRels[] = {AArch64::STLXR_byte, AArch64::STLXR_hword, + AArch64::STLXR_word, AArch64::STLXR_dword}; + + unsigned *LoadOps, *StoreOps; + if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) + LoadOps = LoadAcqs; + else + LoadOps = LoadBares; + + if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) + StoreOps = StoreRels; + else + StoreOps = StoreBares; + + assert(isPowerOf2_32(Size) && Size <= 8 && + "unsupported size for atomic binary op!"); + + LdrOpc = LoadOps[Log2_32(Size)]; + StrOpc = StoreOps[Log2_32(Size)]; } MachineBasicBlock * @@ -313,12 +313,13 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned dest = MI->getOperand(0).getReg(); unsigned ptr = MI->getOperand(1).getReg(); unsigned incr = MI->getOperand(2).getReg(); + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); DebugLoc dl = MI->getDebugLoc(); MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, ldrOpc, strOpc); + getExclusiveOperation(Size, Ord, ldrOpc, strOpc); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); @@ -397,6 +398,8 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, unsigned dest = MI->getOperand(0).getReg(); unsigned ptr = MI->getOperand(1).getReg(); unsigned incr = MI->getOperand(2).getReg(); + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); + unsigned oldval = dest; DebugLoc dl = MI->getDebugLoc(); @@ -411,7 +414,7 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, } unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, ldrOpc, strOpc); + getExclusiveOperation(Size, Ord, ldrOpc, strOpc); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); @@ -479,6 +482,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI, unsigned ptr = MI->getOperand(1).getReg(); unsigned oldval = MI->getOperand(2).getReg(); unsigned newval = MI->getOperand(3).getReg(); + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm()); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); @@ -487,7 +491,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI, TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass; unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, ldrOpc, strOpc); + getExclusiveOperation(Size, Ord, ldrOpc, strOpc); MachineFunction *MF = BB->getParent(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -777,6 +781,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL"; + case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge"; case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall"; default: return NULL; @@ -1662,17 +1667,26 @@ AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - assert(getTargetMachine().getCodeModel() == CodeModel::Small - && "Only small code model supported at the moment"); - - // The most efficient code is PC-relative anyway for the small memory model, - // so we don't need to worry about relocation model. - return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - DAG.getTargetBlockAddress(BA, PtrVT, 0, - AArch64II::MO_NO_FLAG), - DAG.getTargetBlockAddress(BA, PtrVT, 0, - AArch64II::MO_LO12), - DAG.getConstant(/*Alignment=*/ 4, MVT::i32)); + switch(getTargetMachine().getCodeModel()) { + case CodeModel::Small: + // The most efficient code is PC-relative anyway for the small memory model, + // so we don't need to worry about relocation model. + return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + DAG.getTargetBlockAddress(BA, PtrVT, 0, + AArch64II::MO_NO_FLAG), + DAG.getTargetBlockAddress(BA, PtrVT, 0, + AArch64II::MO_LO12), + DAG.getConstant(/*Alignment=*/ 4, MVT::i32)); + case CodeModel::Large: + return DAG.getNode( + AArch64ISD::WrapperLarge, DL, PtrVT, + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3), + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G2_NC), + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G1_NC), + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC)); + default: + llvm_unreachable("Only small and large code models supported now"); + } } @@ -1841,12 +1855,33 @@ AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, } SDValue -AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, - SelectionDAG &DAG) const { - // TableGen doesn't have easy access to the CodeModel or RelocationModel, so - // we make that distinction here. +AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op, + SelectionDAG &DAG) const { + assert(getTargetMachine().getCodeModel() == CodeModel::Large); + assert(getTargetMachine().getRelocationModel() == Reloc::Static); + + EVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op); + const GlobalValue *GV = GN->getGlobal(); + + SDValue GlobalAddr = DAG.getNode( + AArch64ISD::WrapperLarge, dl, PtrVT, + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3), + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC), + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC), + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC)); - // We support the small memory model for now. + if (GN->getOffset() != 0) + return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr, + DAG.getConstant(GN->getOffset(), PtrVT)); + + return GlobalAddr; +} + +SDValue +AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op, + SelectionDAG &DAG) const { assert(getTargetMachine().getCodeModel() == CodeModel::Small); EVT PtrVT = getPointerTy(); @@ -1925,6 +1960,22 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, return GlobalRef; } +SDValue +AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, + SelectionDAG &DAG) const { + // TableGen doesn't have easy access to the CodeModel or RelocationModel, so + // we make those distinctions here. + + switch (getTargetMachine().getCodeModel()) { + case CodeModel::Small: + return LowerGlobalAddressELFSmall(Op, DAG); + case CodeModel::Large: + return LowerGlobalAddressELFLarge(Op, DAG); + default: + llvm_unreachable("Only small and large code models supported now"); + } +} + SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL, @@ -1974,6 +2025,8 @@ AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetELF() && "TLS not implemented for non-ELF targets"); + assert(getTargetMachine().getCodeModel() == CodeModel::Small + && "TLS only supported in small memory model"); const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); @@ -2082,14 +2135,27 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); DebugLoc dl = JT->getDebugLoc(); + EVT PtrVT = getPointerTy(); // When compiling PIC, jump tables get put in the code section so a static // relocation-style is acceptable for both cases. - return DAG.getNode(AArch64ISD::WrapperSmall, dl, getPointerTy(), - DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()), - DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(), - AArch64II::MO_LO12), - DAG.getConstant(1, MVT::i32)); + switch (getTargetMachine().getCodeModel()) { + case CodeModel::Small: + return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, + DAG.getTargetJumpTable(JT->getIndex(), PtrVT), + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, + AArch64II::MO_LO12), + DAG.getConstant(1, MVT::i32)); + case CodeModel::Large: + return DAG.getNode( + AArch64ISD::WrapperLarge, dl, PtrVT, + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G3), + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G2_NC), + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G1_NC), + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G0_NC)); + default: + llvm_unreachable("Only small and large code models supported now"); + } } // (SELECT_CC lhs, rhs, iftrue, iffalse, condcode) @@ -2377,78 +2443,6 @@ static SDValue PerformANDCombine(SDNode *N, DAG.getConstant(LSB + Width - 1, MVT::i64)); } -static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode, - TargetLowering::DAGCombinerInfo &DCI) { - // An atomic operation followed by an acquiring atomic fence can be reduced to - // an acquiring load. The atomic operation provides a convenient pointer to - // load from. If the original operation was a load anyway we can actually - // combine the two operations into an acquiring load. - SelectionDAG &DAG = DCI.DAG; - SDValue AtomicOp = FenceNode->getOperand(0); - AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp); - - // A fence on its own can't be optimised - if (!AtomicNode) - return SDValue(); - - AtomicOrdering FenceOrder - = static_cast<AtomicOrdering>(FenceNode->getConstantOperandVal(1)); - SynchronizationScope FenceScope - = static_cast<SynchronizationScope>(FenceNode->getConstantOperandVal(2)); - - if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope()) - return SDValue(); - - // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so - // the chain we use should be its input, otherwise we'll put our store after - // it so we use its output chain. - SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ? - AtomicNode->getChain() : AtomicOp; - - // We have an acquire fence with a handy atomic operation nearby, we can - // convert the fence into a load-acquire, discarding the result. - DebugLoc DL = FenceNode->getDebugLoc(); - SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(), - AtomicNode->getValueType(0), - Chain, // Chain - AtomicOp.getOperand(1), // Pointer - AtomicNode->getMemOperand(), Acquire, - FenceScope); - - if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD) - DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode()); - - return Op.getValue(1); -} - -static SDValue PerformATOMIC_STORECombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - // A releasing atomic fence followed by an atomic store can be combined into a - // single store operation. - SelectionDAG &DAG = DCI.DAG; - AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N); - SDValue FenceOp = AtomicNode->getOperand(0); - - if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE) - return SDValue(); - - AtomicOrdering FenceOrder - = static_cast<AtomicOrdering>(FenceOp->getConstantOperandVal(1)); - SynchronizationScope FenceScope - = static_cast<SynchronizationScope>(FenceOp->getConstantOperandVal(2)); - - if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope()) - return SDValue(); - - DebugLoc DL = AtomicNode->getDebugLoc(); - return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(), - FenceOp.getOperand(0), // Chain - AtomicNode->getOperand(1), // Pointer - AtomicNode->getOperand(2), // Value - AtomicNode->getMemOperand(), Release, - FenceScope); -} - /// For a true bitfield insert, the bits getting into that contiguous mask /// should come from the low part of an existing value: they must be formed from /// a compatible SHL operation (unless they're already low). This function @@ -2804,8 +2798,6 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: break; case ISD::AND: return PerformANDCombine(N, DCI); - case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI); - case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::SRA: return PerformSRACombine(N, DCI); } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 4960d28..d49b3ee 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -103,7 +103,12 @@ namespace AArch64ISD { UBFX, // Wraps an address which the ISelLowering phase has decided should be - // created using the small absolute memory model: i.e. adrp/add or + // created using the large memory model style: i.e. a sequence of four + // movz/movk instructions. + WrapperLarge, + + // Wraps an address which the ISelLowering phase has decided should be + // created using the small memory model style: i.e. adrp/add or // adrp/mem-op. This exists to prevent bare TargetAddresses which may never // get selected. WrapperSmall @@ -206,7 +211,11 @@ public: SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; + + SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td index cb93471..9dd122f 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // This file describes AArch64 instruction formats, down to the level of the // instruction's overall class. -// ===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 37be5e4..d2cfc7d 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -70,12 +70,20 @@ def A64cmn : PatFrag<(ops node:$lhs, node:$rhs), // made for a variable/address at ISelLowering. // + The output of ISelLowering should be selectable (hence the Wrapper, // rather than a bare target opcode) -def SDTAArch64Wrapper : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, - SDTCisSameAs<1, 2>, - SDTCisVT<3, i32>, - SDTCisPtrTy<0>]>; +def SDTAArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisSameAs<0, 4>, + SDTCisPtrTy<0>]>; -def A64WrapperSmall : SDNode<"AArch64ISD::WrapperSmall", SDTAArch64Wrapper>; +def A64WrapperLarge :SDNode<"AArch64ISD::WrapperLarge", SDTAArch64WrapperLarge>; + +def SDTAArch64WrapperSmall : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>, + SDTCisPtrTy<0>]>; + +def A64WrapperSmall :SDNode<"AArch64ISD::WrapperSmall", SDTAArch64WrapperSmall>; def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; @@ -159,49 +167,55 @@ let Defs = [XSP], Uses = [XSP] in { // Atomic operation pseudo-instructions //===----------------------------------------------------------------------===// -let usesCustomInserter = 1 in { -multiclass AtomicSizes<string opname> { - def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), - [(set i32:$dst, (!cast<SDNode>(opname # "_8") i64:$ptr, i32:$incr))]>; - def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), - [(set i32:$dst, (!cast<SDNode>(opname # "_16") i64:$ptr, i32:$incr))]>; - def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), - [(set i32:$dst, (!cast<SDNode>(opname # "_32") i64:$ptr, i32:$incr))]>; - def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr), - [(set i64:$dst, (!cast<SDNode>(opname # "_64") i64:$ptr, i64:$incr))]>; -} -} - -defm ATOMIC_LOAD_ADD : AtomicSizes<"atomic_load_add">; -defm ATOMIC_LOAD_SUB : AtomicSizes<"atomic_load_sub">; -defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">; -defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">; -defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">; -defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">; -defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">; +// These get selected from C++ code as a pretty much direct translation from the +// generic DAG nodes. The one exception is the AtomicOrdering is added as an +// operand so that the eventual lowering can make use of it and choose +// acquire/release operations when required. + +let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in { +multiclass AtomicSizes { + def _I8 : PseudoInst<(outs GPR32:$dst), + (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; + def _I16 : PseudoInst<(outs GPR32:$dst), + (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; + def _I32 : PseudoInst<(outs GPR32:$dst), + (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; + def _I64 : PseudoInst<(outs GPR64:$dst), + (ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>; +} +} + +defm ATOMIC_LOAD_ADD : AtomicSizes; +defm ATOMIC_LOAD_SUB : AtomicSizes; +defm ATOMIC_LOAD_AND : AtomicSizes; +defm ATOMIC_LOAD_OR : AtomicSizes; +defm ATOMIC_LOAD_XOR : AtomicSizes; +defm ATOMIC_LOAD_NAND : AtomicSizes; +defm ATOMIC_SWAP : AtomicSizes; let Defs = [NZCV] in { // These operations need a CMP to calculate the correct value - defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">; - defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">; - defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">; - defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">; -} - -let usesCustomInserter = 1, Defs = [NZCV] in { -def ATOMIC_CMP_SWAP_I8 - : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), - [(set i32:$dst, (atomic_cmp_swap_8 i64:$ptr, i32:$old, i32:$new))]>; -def ATOMIC_CMP_SWAP_I16 - : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), - [(set i32:$dst, (atomic_cmp_swap_16 i64:$ptr, i32:$old, i32:$new))]>; -def ATOMIC_CMP_SWAP_I32 - : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), - [(set i32:$dst, (atomic_cmp_swap_32 i64:$ptr, i32:$old, i32:$new))]>; -def ATOMIC_CMP_SWAP_I64 - : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new), - [(set i64:$dst, (atomic_cmp_swap_64 i64:$ptr, i64:$old, i64:$new))]>; + defm ATOMIC_LOAD_MIN : AtomicSizes; + defm ATOMIC_LOAD_MAX : AtomicSizes; + defm ATOMIC_LOAD_UMIN : AtomicSizes; + defm ATOMIC_LOAD_UMAX : AtomicSizes; +} + +class AtomicCmpSwap<RegisterClass GPRData> + : PseudoInst<(outs GPRData:$dst), + (ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new, + i32imm:$ordering), []> { + let usesCustomInserter = 1; + let hasCtrlDep = 1; + let mayLoad = 1; + let mayStore = 1; + let Defs = [NZCV]; } +def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<GPR32>; +def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>; +def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>; +def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>; + //===----------------------------------------------------------------------===// // Add-subtract (extended register) instructions //===----------------------------------------------------------------------===// @@ -2579,7 +2593,8 @@ defm LDAR : A64I_LRex<"ldar", 0b101>; class acquiring_load<PatFrag base> : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - return cast<AtomicSDNode>(N)->getOrdering() == Acquire; + AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); + return Ordering == Acquire || Ordering == SequentiallyConsistent; }]>; def atomic_load_acquire_8 : acquiring_load<atomic_load_8>; @@ -2610,7 +2625,8 @@ class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, class releasing_store<PatFrag base> : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - return cast<AtomicSDNode>(N)->getOrdering() == Release; + AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); + return Ordering == Release || Ordering == SequentiallyConsistent; }]>; def atomic_store_release_8 : releasing_store<atomic_store_8>; @@ -3863,7 +3879,7 @@ multiclass movw_operands<string prefix, string instname, int width> { let DiagnosticType = "MOVWUImm16"; } - def _imm : Operand<i32> { + def _imm : Operand<i64> { let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand"); let PrintMethod = "printMoveWideImmOperand"; let EncoderMethod = "getMoveWideImmOpValue"; @@ -3934,7 +3950,7 @@ multiclass movalias_operand<string prefix, string basename, # "A64Imms::" # immpredicate # ">"; } - def _movimm : Operand<i32> { + def _movimm : Operand<i64> { let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand"); let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); @@ -3958,6 +3974,15 @@ def : movalias<MOVZxii, GPR64, movz64_movimm>; def : movalias<MOVNwii, GPR32, movn32_movimm>; def : movalias<MOVNxii, GPR64, movn64_movimm>; +def movw_addressref : ComplexPattern<i64, 2, "SelectMOVWAddressRef">; + +def : Pat<(A64WrapperLarge movw_addressref:$G3, movw_addressref:$G2, + movw_addressref:$G1, movw_addressref:$G0), + (MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref:$G3), + movw_addressref:$G2), + movw_addressref:$G1), + movw_addressref:$G0)>; + //===----------------------------------------------------------------------===// // PC-relative addressing instructions //===----------------------------------------------------------------------===// @@ -4454,8 +4479,6 @@ def : ADRP_ADD<A64WrapperSmall, tjumptable>; // GOT access patterns //===----------------------------------------------------------------------===// -// FIXME: Wibble - class GOTLoadSmall<SDNode addrfrag> : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)), (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp index c96bf85..3d22330 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -68,6 +68,18 @@ AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO, case AArch64II::MO_TPREL_G0_NC: Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext); break; + case AArch64II::MO_ABS_G3: + Expr = AArch64MCExpr::CreateABS_G3(Expr, OutContext); + break; + case AArch64II::MO_ABS_G2_NC: + Expr = AArch64MCExpr::CreateABS_G2_NC(Expr, OutContext); + break; + case AArch64II::MO_ABS_G1_NC: + Expr = AArch64MCExpr::CreateABS_G1_NC(Expr, OutContext); + break; + case AArch64II::MO_ABS_G0_NC: + Expr = AArch64MCExpr::CreateABS_G0_NC(Expr, OutContext); + break; case AArch64II::MO_NO_FLAG: // Expr is already correct break; diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index b83577a..3b811df 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -63,14 +63,15 @@ public: ~AArch64ELFStreamer() {} - virtual void ChangeSection(const MCSection *Section) { + virtual void ChangeSection(const MCSection *Section, + const MCExpr *Subsection) { // We have to keep track of the mapping symbol state of any sections we // use. Each one should start off as EMS_None, which is provided as the // default constructor by DenseMap::lookup. - LastMappingSymbols[getPreviousSection()] = LastEMS; + LastMappingSymbols[getPreviousSection().first] = LastEMS; LastEMS = LastMappingSymbols.lookup(Section); - MCELFStreamer::ChangeSection(Section); + MCELFStreamer::ChangeSection(Section, Subsection); } /// This function is the one used to emit instruction data into the ELF @@ -129,7 +130,7 @@ private: MCELF::SetType(SD, ELF::STT_NOTYPE); MCELF::SetBinding(SD, ELF::STB_LOCAL); SD.setExternal(false); - Symbol->setSection(*getCurrentSection()); + Symbol->setSection(*getCurrentSection().first); const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); Symbol->setVariableValue(Value); diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index c0e3b29..d9798ae 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -133,6 +133,26 @@ public: return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx); } + static const AArch64MCExpr *CreateABS_G3(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_ABS_G3, Expr, Ctx); + } + + static const AArch64MCExpr *CreateABS_G2_NC(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_ABS_G2_NC, Expr, Ctx); + } + + static const AArch64MCExpr *CreateABS_G1_NC(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_ABS_G1_NC, Expr, Ctx); + } + + static const AArch64MCExpr *CreateABS_G0_NC(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_ABS_G0_NC, Expr, Ctx); + } + /// @} /// @name Accessors /// @{ diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 7960db0..819eead 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -81,6 +81,12 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM, if (CM == CodeModel::Default) CM = CodeModel::Small; + else if (CM == CodeModel::JITDefault) { + // The default MCJIT memory managers make no guarantees about where they can + // find an executable page; JITed code needs to be able to refer to globals + // no matter how far away they are. + CM = CodeModel::Large; + } X->InitMCCodeGenInfo(RM, CM, OL); return X; diff --git a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp index b8099cb..fc706a4 100644 --- a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp @@ -19,6 +19,6 @@ using namespace llvm; Target llvm::TheAArch64Target; extern "C" void LLVMInitializeAArch64TargetInfo() { - RegisterTarget<Triple::aarch64> + RegisterTarget<Triple::aarch64, /*HasJIT=*/true> X(TheAArch64Target, "aarch64", "AArch64"); } diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 1678559..bedccb5 100644 --- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -981,8 +981,11 @@ bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) { Rotation = RepeatWidth - Rotation; } - uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation) - | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask); + uint64_t ReplicatedOnes = ReplicatedMask; + if (Rotation != 0 && Rotation != 64) + ReplicatedOnes = (ReplicatedMask >> Rotation) + | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask); + // Of course, they may not actually be ones, so we have to check that: if (!isMask_64(ReplicatedOnes)) continue; @@ -1051,13 +1054,14 @@ bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits, int Rotation = (ImmR & (Width - 1)); uint64_t Mask = (1ULL << Num1s) - 1; uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1; - Mask = (Mask >> Rotation) - | ((Mask << (Width - Rotation)) & WidthMask); + if (Rotation != 0 && Rotation != 64) + Mask = (Mask >> Rotation) + | ((Mask << (Width - Rotation)) & WidthMask); - Imm = 0; - for (unsigned i = 0; i < RegWidth / Width; ++i) { - Imm |= Mask; + Imm = Mask; + for (unsigned i = 1; i < RegWidth / Width; ++i) { Mask <<= Width; + Imm |= Mask; } return true; diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 1b773d6..9a1ca61 100644 --- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -1037,7 +1037,14 @@ namespace AArch64II { // MO_LO12 - On a symbol operand, this represents a relocation containing // lower 12 bits of the address. Used in add/sub/ldr/str. - MO_LO12 + MO_LO12, + + // MO_ABS_G* - Represent the 16-bit granules of an absolute reference using + // movz/movk instructions. + MO_ABS_G3, + MO_ABS_G2_NC, + MO_ABS_G1_NC, + MO_ABS_G0_NC }; } |