diff options
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86FastISel.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/X86/X86FastISel.cpp | 190 |
1 files changed, 125 insertions, 65 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp index c890fdd..527e5d5 100644 --- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp +++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp @@ -180,44 +180,6 @@ private: } // end anonymous namespace. -static std::pair<X86::CondCode, bool> -getX86ConditionCode(CmpInst::Predicate Predicate) { - X86::CondCode CC = X86::COND_INVALID; - bool NeedSwap = false; - switch (Predicate) { - default: break; - // Floating-point Predicates - case CmpInst::FCMP_UEQ: CC = X86::COND_E; break; - case CmpInst::FCMP_OLT: NeedSwap = true; LLVM_FALLTHROUGH; - case CmpInst::FCMP_OGT: CC = X86::COND_A; break; - case CmpInst::FCMP_OLE: NeedSwap = true; LLVM_FALLTHROUGH; - case CmpInst::FCMP_OGE: CC = X86::COND_AE; break; - case CmpInst::FCMP_UGT: NeedSwap = true; LLVM_FALLTHROUGH; - case CmpInst::FCMP_ULT: CC = X86::COND_B; break; - case CmpInst::FCMP_UGE: NeedSwap = true; LLVM_FALLTHROUGH; - case CmpInst::FCMP_ULE: CC = X86::COND_BE; break; - case CmpInst::FCMP_ONE: CC = X86::COND_NE; break; - case CmpInst::FCMP_UNO: CC = X86::COND_P; break; - case CmpInst::FCMP_ORD: CC = X86::COND_NP; break; - case CmpInst::FCMP_OEQ: LLVM_FALLTHROUGH; - case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break; - - // Integer Predicates - case CmpInst::ICMP_EQ: CC = X86::COND_E; break; - case CmpInst::ICMP_NE: CC = X86::COND_NE; break; - case CmpInst::ICMP_UGT: CC = X86::COND_A; break; - case CmpInst::ICMP_UGE: CC = X86::COND_AE; break; - case CmpInst::ICMP_ULT: CC = X86::COND_B; break; - case CmpInst::ICMP_ULE: CC = X86::COND_BE; break; - case CmpInst::ICMP_SGT: CC = X86::COND_G; break; - case CmpInst::ICMP_SGE: CC = X86::COND_GE; break; - case CmpInst::ICMP_SLT: CC = X86::COND_L; break; - case CmpInst::ICMP_SLE: CC = X86::COND_LE; break; - } - - return std::make_pair(CC, NeedSwap); -} - static std::pair<unsigned, bool> getX86SSEConditionCode(CmpInst::Predicate Predicate) { unsigned CC; @@ -367,6 +329,10 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: + // TODO: Support this properly. + if (Subtarget->hasAVX512()) + return false; + LLVM_FALLTHROUGH; case MVT::i8: Opc = X86::MOV8rm; RC = &X86::GR8RegClass; @@ -448,6 +414,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm; + else if (IsNonTemporal && Alignment >= 16) + return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm; else @@ -458,6 +426,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = X86::VMOVNTDQAYrm; + else if (IsNonTemporal && Alignment >= 16) + return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm; else @@ -471,6 +441,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = X86::VMOVNTDQAYrm; + else if (IsNonTemporal && Alignment >= 16) + return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm; else @@ -524,6 +496,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, X86AddressMode &AM, MachineMemOperand *MMO, bool Aligned) { + bool HasSSE1 = Subtarget->hasSSE1(); bool HasSSE2 = Subtarget->hasSSE2(); bool HasSSE4A = Subtarget->hasSSE4A(); bool HasAVX = Subtarget->hasAVX(); @@ -537,6 +510,16 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, case MVT::f80: // No f80 support yet. default: return false; case MVT::i1: { + // In case ValReg is a K register, COPY to a GPR + if (MRI.getRegClass(ValReg) == &X86::VK1RegClass) { + unsigned KValReg = ValReg; + ValReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ValReg) + .addReg(KValReg); + ValReg = fastEmitInst_extractsubreg(MVT::i8, ValReg, /*Kill=*/true, + X86::sub_8bit); + } // Mask out all but lowest bit. unsigned AndResult = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -574,6 +557,9 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, } else Opc = X86::ST_Fp64m; break; + case MVT::x86mmx: + Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr; + break; case MVT::v4f32: if (Aligned) { if (IsNonTemporal) @@ -1201,7 +1187,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { CC != CallingConv::X86_StdCall && CC != CallingConv::X86_ThisCall && CC != CallingConv::X86_64_SysV && - CC != CallingConv::X86_64_Win64) + CC != CallingConv::Win64) return false; // Don't handle popping bytes if they don't fit the ret's immediate. @@ -1268,6 +1254,16 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { if (SrcVT == MVT::i1) { if (Outs[0].Flags.isSExt()) return false; + // In case SrcReg is a K register, COPY to a GPR + if (MRI.getRegClass(SrcReg) == &X86::VK1RegClass) { + unsigned KSrcReg = SrcReg; + SrcReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), SrcReg) + .addReg(KSrcReg); + SrcReg = fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true, + X86::sub_8bit); + } SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false); SrcVT = MVT::i8; } @@ -1531,7 +1527,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { X86::CondCode CC; bool SwapArgs; - std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); + std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); unsigned Opc = X86::getSETFromCond(CC); @@ -1559,6 +1555,17 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) { // Handle zero-extension from i1 to i8, which is common. MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); if (SrcVT == MVT::i1) { + // In case ResultReg is a K register, COPY to a GPR + if (MRI.getRegClass(ResultReg) == &X86::VK1RegClass) { + unsigned KResultReg = ResultReg; + ResultReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(KResultReg); + ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true, + X86::sub_8bit); + } + // Set the high bits to zero. ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); SrcVT = MVT::i8; @@ -1658,7 +1665,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { bool SwapArgs; unsigned BranchOpc; - std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); + std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); BranchOpc = X86::GetCondBranchFromCond(CC); @@ -1740,10 +1747,12 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) { unsigned KOpReg = OpReg; - OpReg = createResultReg(&X86::GR8RegClass); + OpReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), OpReg) .addReg(KOpReg); + OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true, + X86::sub_8bit); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) .addReg(OpReg) @@ -2029,7 +2038,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { } bool NeedSwap; - std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate); + std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate); assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); const Value *CmpLHS = CI->getOperand(0); @@ -2084,10 +2093,12 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { unsigned KCondReg = CondReg; - CondReg = createResultReg(&X86::GR8RegClass); + CondReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), CondReg) .addReg(KCondReg, getKillRegState(CondIsKill)); + CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true, + X86::sub_8bit); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) .addReg(CondReg, getKillRegState(CondIsKill)) @@ -2106,7 +2117,8 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { if (!LHSReg || !RHSReg) return false; - unsigned Opc = X86::getCMovFromCond(CC, RC->getSize()); + const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo(); + unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8); unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill); updateValueMap(I, ResultReg); @@ -2275,7 +2287,7 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { const auto *CI = dyn_cast<CmpInst>(Cond); if (CI && (CI->getParent() == I->getParent())) { bool NeedSwap; - std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate()); + std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate()); if (CC > X86::LAST_VALID_COND) return false; @@ -2297,10 +2309,12 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { unsigned KCondReg = CondReg; - CondReg = createResultReg(&X86::GR8RegClass); + CondReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), CondReg) .addReg(KCondReg, getKillRegState(CondIsKill)); + CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true, + X86::sub_8bit); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) .addReg(CondReg, getKillRegState(CondIsKill)) @@ -2423,12 +2437,22 @@ bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I, if (OpReg == 0) return false; + unsigned ImplicitDefReg; + if (Subtarget->hasAVX()) { + ImplicitDefReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); + + } + unsigned ResultReg = createResultReg(RC); MachineInstrBuilder MIB; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc), ResultReg); + if (Subtarget->hasAVX()) - MIB.addReg(OpReg); + MIB.addReg(ImplicitDefReg); + MIB.addReg(OpReg); updateValueMap(I, ResultReg); return true; @@ -2461,7 +2485,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { EVT DstVT = TLI.getValueType(DL, I->getType()); // This code only handles truncation to byte. - if (DstVT != MVT::i8 && DstVT != MVT::i1) + // TODO: Support truncate to i1 with AVX512. + if (DstVT != MVT::i8 && (DstVT != MVT::i1 || Subtarget->hasAVX512())) return false; if (!TLI.isTypeLegal(SrcVT)) return false; @@ -3014,19 +3039,19 @@ bool X86FastISel::fastLowerArguments() { if (!Subtarget->is64Bit()) return false; + if (Subtarget->useSoftFloat()) + return false; + // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. unsigned GPRCnt = 0; unsigned FPRCnt = 0; - unsigned Idx = 0; for (auto const &Arg : F->args()) { - // The first argument is at index 1. - ++Idx; - if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || - F->getAttributes().hasAttribute(Idx, Attribute::InReg) || - F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) || - F->getAttributes().hasAttribute(Idx, Attribute::Nest)) + if (Arg.hasAttribute(Attribute::ByVal) || + Arg.hasAttribute(Attribute::InReg) || + Arg.hasAttribute(Attribute::StructRet) || + Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::Nest)) return false; Type *ArgTy = Arg.getType(); @@ -3105,8 +3130,8 @@ static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, return 0; if (CS) - if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) || - CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU()) + if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) || + CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU()) return 0; return 4; @@ -3127,6 +3152,15 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isCallingConvWin64(CC); + const CallInst *CI = + CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr; + const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr; + + // Functions with no_caller_saved_registers that need special handling. + if ((CI && CI->hasFnAttr("no_caller_saved_registers")) || + (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers"))) + return false; + // Handle only C, fastcc, and webkit_js calling conventions for now. switch (CC) { default: return false; @@ -3137,7 +3171,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: case CallingConv::X86_ThisCall: - case CallingConv::X86_64_Win64: + case CallingConv::Win64: case CallingConv::X86_64_SysV: break; } @@ -3230,7 +3264,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) - .addImm(NumBytes).addImm(0); + .addImm(NumBytes).addImm(0).addImm(0); // Walk the register/memloc assignments, inserting copies/loads. const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); @@ -3266,6 +3300,16 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Handle zero-extension from i1 to i8, which is common. if (ArgVT == MVT::i1) { + // In case SrcReg is a K register, COPY to a GPR + if (MRI.getRegClass(ArgReg) == &X86::VK1RegClass) { + unsigned KArgReg = ArgReg; + ArgReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ArgReg) + .addReg(KArgReg); + ArgReg = fastEmitInst_extractsubreg(MVT::i8, ArgReg, /*Kill=*/true, + X86::sub_8bit); + } // Set the high bits to zero. ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false); ArgVT = MVT::i8; @@ -3463,6 +3507,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { CCValAssign &VA = RVLocs[i]; EVT CopyVT = VA.getValVT(); unsigned CopyReg = ResultReg + i; + unsigned SrcReg = VA.getLocReg(); // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && @@ -3470,9 +3515,19 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { report_fatal_error("SSE register return with SSE disabled"); } + // If the return value is an i1 and AVX-512 is enabled, we need + // to do a fixup to make the copy legal. + if (CopyVT == MVT::i1 && SrcReg == X86::AL && Subtarget->hasAVX512()) { + // Need to copy to a GR32 first. + // TODO: MOVZX isn't great here. We don't care about the upper bits. + SrcReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(X86::MOVZX32rr8), SrcReg).addReg(X86::AL); + } + // If we prefer to use the value in xmm registers, copy it out as f80 and // use a truncate to move it from fp stack reg to xmm reg. - if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && + if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) && isScalarFPTypeInSSEReg(VA.getValVT())) { CopyVT = MVT::f80; CopyReg = createResultReg(&X86::RFP80RegClass); @@ -3480,7 +3535,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Copy out the result. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg()); + TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg); InRegs.push_back(VA.getLocReg()); // Round the f80 to the right size, which also moves it to the appropriate @@ -3622,7 +3677,12 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) { unsigned Opc = 0; switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type"); - case MVT::i1: VT = MVT::i8; LLVM_FALLTHROUGH; + case MVT::i1: + // TODO: Support this properly. + if (Subtarget->hasAVX512()) + return 0; + VT = MVT::i8; + LLVM_FALLTHROUGH; case MVT::i8: Opc = X86::MOV8ri; break; case MVT::i16: Opc = X86::MOV16ri; break; case MVT::i32: Opc = X86::MOV32ri; break; |