summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/X86/X86FastISel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86FastISel.cpp')
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp401
1 files changed, 309 insertions, 92 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index 688a544..3dc75d7 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -37,6 +37,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -58,8 +59,8 @@ class X86FastISel final : public FastISel {
public:
explicit X86FastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo)
- : FastISel(funcInfo, libInfo) {
- Subtarget = &TM.getSubtarget<X86Subtarget>();
+ : FastISel(funcInfo, libInfo) {
+ Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
}
@@ -80,15 +81,15 @@ public:
#include "X86GenFastISel.inc"
private:
- bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
+ bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, DebugLoc DL);
- bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
- unsigned &ResultReg);
+ bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
+ unsigned &ResultReg, unsigned Alignment = 1);
- bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
+ bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
MachineMemOperand *MMO = nullptr, bool Aligned = false);
bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
- const X86AddressMode &AM,
+ X86AddressMode &AM,
MachineMemOperand *MMO = nullptr, bool Aligned = false);
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
@@ -123,11 +124,15 @@ private:
bool X86SelectTrunc(const Instruction *I);
+ bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
+ const TargetRegisterClass *RC);
+
bool X86SelectFPExt(const Instruction *I);
bool X86SelectFPTrunc(const Instruction *I);
+ bool X86SelectSIToFP(const Instruction *I);
const X86InstrInfo *getInstrInfo() const {
- return getTargetMachine()->getSubtargetImpl()->getInstrInfo();
+ return Subtarget->getInstrInfo();
}
const X86TargetMachine *getTargetMachine() const {
return static_cast<const X86TargetMachine *>(&TM);
@@ -160,6 +165,9 @@ private:
bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
const Value *Cond);
+
+ const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
+ X86AddressMode &AM);
};
} // end anonymous namespace.
@@ -237,6 +245,20 @@ getX86SSEConditionCode(CmpInst::Predicate Predicate) {
return std::make_pair(CC, NeedSwap);
}
+/// \brief Adds a complex addressing mode to the given machine instr builder.
+/// Note, this will constrain the index register. If its not possible to
+/// constrain the given index register, then a new one will be created. The
+/// IndexReg field of the addressing mode will be updated to match in this case.
+const MachineInstrBuilder &
+X86FastISel::addFullAddress(const MachineInstrBuilder &MIB,
+ X86AddressMode &AM) {
+ // First constrain the index register. It needs to be a GR64_NOSP.
+ AM.IndexReg = constrainOperandRegClass(MIB->getDesc(), AM.IndexReg,
+ MIB->getNumOperands() +
+ X86::AddrIndexReg);
+ return ::addFullAddress(MIB, AM);
+}
+
/// \brief Check if it is possible to fold the condition from the XALU intrinsic
/// into the user. The condition code will only be updated on success.
bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
@@ -321,8 +343,9 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
-bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
- MachineMemOperand *MMO, unsigned &ResultReg) {
+bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
+ MachineMemOperand *MMO, unsigned &ResultReg,
+ unsigned Alignment) {
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = nullptr;
@@ -367,6 +390,30 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
case MVT::f80:
// No f80 support yet.
return false;
+ case MVT::v4f32:
+ if (Alignment >= 16)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm;
+ RC = &X86::VR128RegClass;
+ break;
+ case MVT::v2f64:
+ if (Alignment >= 16)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm;
+ RC = &X86::VR128RegClass;
+ break;
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (Alignment >= 16)
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm;
+ RC = &X86::VR128RegClass;
+ break;
}
ResultReg = createResultReg(RC);
@@ -383,7 +430,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
/// and a displacement offset, or a GlobalAddress,
/// i.e. V. Return true if it is possible.
bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
- const X86AddressMode &AM,
+ X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
@@ -444,7 +491,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
}
bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
- const X86AddressMode &AM,
+ X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Val))
@@ -1063,8 +1110,14 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) {
if (!X86SelectAddress(Ptr, AM))
return false;
+ unsigned Alignment = LI->getAlignment();
+ unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = ABIAlignment;
+
unsigned ResultReg = 0;
- if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
+ if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
+ Alignment))
return false;
updateValueMap(I, ResultReg);
@@ -1089,27 +1142,37 @@ static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
}
}
-/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
-/// of the comparison, return an opcode that works for the compare (e.g.
-/// CMP32ri) otherwise return 0.
+/// If we have a comparison with RHS as the RHS of the comparison, return an
+/// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
+ int64_t Val = RHSC->getSExtValue();
switch (VT.getSimpleVT().SimpleTy) {
// Otherwise, we can't fold the immediate into this comparison.
- default: return 0;
- case MVT::i8: return X86::CMP8ri;
- case MVT::i16: return X86::CMP16ri;
- case MVT::i32: return X86::CMP32ri;
+ default:
+ return 0;
+ case MVT::i8:
+ return X86::CMP8ri;
+ case MVT::i16:
+ if (isInt<8>(Val))
+ return X86::CMP16ri8;
+ return X86::CMP16ri;
+ case MVT::i32:
+ if (isInt<8>(Val))
+ return X86::CMP32ri8;
+ return X86::CMP32ri;
case MVT::i64:
+ if (isInt<8>(Val))
+ return X86::CMP64ri8;
// 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
// field.
- if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
+ if (isInt<32>(Val))
return X86::CMP64ri32;
return 0;
}
}
bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
- EVT VT) {
+ EVT VT, DebugLoc CurDbgLoc) {
unsigned Op0Reg = getRegForValue(Op0);
if (Op0Reg == 0) return false;
@@ -1122,7 +1185,7 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
// CMPri, otherwise use CMPrr.
if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CompareImmOpc))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
.addReg(Op0Reg)
.addImm(Op1C->getSExtValue());
return true;
@@ -1134,7 +1197,7 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
unsigned Op1Reg = getRegForValue(Op1);
if (Op1Reg == 0) return false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CompareOpc))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
.addReg(Op0Reg)
.addReg(Op1Reg);
@@ -1202,7 +1265,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
ResultReg = createResultReg(&X86::GR8RegClass);
if (SETFOpc) {
- if (!X86FastEmitCompare(LHS, RHS, VT))
+ if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
return false;
unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
@@ -1227,7 +1290,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
std::swap(LHS, RHS);
// Emit a compare of LHS/RHS.
- if (!X86FastEmitCompare(LHS, RHS, VT))
+ if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
@@ -1353,7 +1416,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
std::swap(CmpLHS, CmpRHS);
// Emit a compare of the LHS and RHS, setting the flags.
- if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT))
+ if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
@@ -1740,7 +1803,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
EVT CmpVT = TLI.getValueType(CmpLHS->getType());
// Emit a compare of the LHS and RHS, setting the flags.
- if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
+ if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
return false;
if (SETFOpc) {
@@ -1805,11 +1868,11 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
return true;
}
-/// \brief Emit SSE instructions to lower the select.
+/// \brief Emit SSE or AVX instructions to lower the select.
///
/// Try to use SSE1/SSE2 instructions to simulate a select without branches.
/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
-/// SSE instructions are available.
+/// SSE instructions are available. If AVX is available, try to use a VBLENDV.
bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
// Optimize conditions coming from a compare if both instructions are in the
// same basic block (values defined in other basic blocks may not have
@@ -1845,19 +1908,17 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
if (NeedSwap)
std::swap(CmpLHS, CmpRHS);
- static unsigned OpcTable[2][2][4] = {
- { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
- { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } },
- { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr },
- { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } }
+ // Choose the SSE instruction sequence based on data type (float or double).
+ static unsigned OpcTable[2][4] = {
+ { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
+ { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr }
};
- bool HasAVX = Subtarget->hasAVX();
unsigned *Opc = nullptr;
switch (RetVT.SimpleTy) {
default: return false;
- case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break;
- case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break;
+ case MVT::f32: Opc = &OpcTable[0][0]; break;
+ case MVT::f64: Opc = &OpcTable[1][0]; break;
}
const Value *LHS = I->getOperand(1);
@@ -1879,14 +1940,33 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
return false;
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
- unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
- CmpRHSReg, CmpRHSIsKill, CC);
- unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
- LHSReg, LHSIsKill);
- unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
- RHSReg, RHSIsKill);
- unsigned ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
- AndReg, /*IsKill=*/true);
+ unsigned ResultReg;
+
+ if (Subtarget->hasAVX()) {
+ // If we have AVX, create 1 blendv instead of 3 logic instructions.
+ // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
+ // uses XMM0 as the selection register. That may need just as many
+ // instructions as the AND/ANDN/OR sequence due to register moves, so
+ // don't bother.
+ unsigned CmpOpcode =
+ (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
+ unsigned BlendOpcode =
+ (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
+
+ unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
+ CmpRHSReg, CmpRHSIsKill, CC);
+ ResultReg = fastEmitInst_rrr(BlendOpcode, RC, RHSReg, RHSIsKill,
+ LHSReg, LHSIsKill, CmpReg, true);
+ } else {
+ unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
+ CmpRHSReg, CmpRHSIsKill, CC);
+ unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+ LHSReg, LHSIsKill);
+ unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+ RHSReg, RHSIsKill);
+ ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
+ AndReg, /*IsKill=*/true);
+ }
updateValueMap(I, ResultReg);
return true;
}
@@ -1924,7 +2004,7 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
std::swap(CmpLHS, CmpRHS);
EVT CmpVT = TLI.getValueType(CmpLHS->getType());
- if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
+ if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
return false;
} else {
unsigned CondReg = getRegForValue(Cond);
@@ -2001,41 +2081,84 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
return false;
}
+bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
+ // The target-independent selection algorithm in FastISel already knows how
+ // to select a SINT_TO_FP if the target is SSE but not AVX.
+ // Early exit if the subtarget doesn't have AVX.
+ if (!Subtarget->hasAVX())
+ return false;
+
+ if (!I->getOperand(0)->getType()->isIntegerTy(32))
+ return false;
+
+ // Select integer to float/double conversion.
+ unsigned OpReg = getRegForValue(I->getOperand(0));
+ if (OpReg == 0)
+ return false;
+
+ const TargetRegisterClass *RC = nullptr;
+ unsigned Opcode;
+
+ if (I->getType()->isDoubleTy()) {
+ // sitofp int -> double
+ Opcode = X86::VCVTSI2SDrr;
+ RC = &X86::FR64RegClass;
+ } else if (I->getType()->isFloatTy()) {
+ // sitofp int -> float
+ Opcode = X86::VCVTSI2SSrr;
+ RC = &X86::FR32RegClass;
+ } else
+ return false;
+
+ unsigned ImplicitDefReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
+ unsigned ResultReg =
+ fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+// Helper method used by X86SelectFPExt and X86SelectFPTrunc.
+bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
+ unsigned TargetOpc,
+ const TargetRegisterClass *RC) {
+ assert((I->getOpcode() == Instruction::FPExt ||
+ I->getOpcode() == Instruction::FPTrunc) &&
+ "Instruction must be an FPExt or FPTrunc!");
+
+ unsigned OpReg = getRegForValue(I->getOperand(0));
+ if (OpReg == 0)
+ return false;
+
+ unsigned ResultReg = createResultReg(RC);
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
+ ResultReg);
+ if (Subtarget->hasAVX())
+ MIB.addReg(OpReg);
+ MIB.addReg(OpReg);
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
bool X86FastISel::X86SelectFPExt(const Instruction *I) {
- // fpext from float to double.
- if (X86ScalarSSEf64 &&
- I->getType()->isDoubleTy()) {
- const Value *V = I->getOperand(0);
- if (V->getType()->isFloatTy()) {
- unsigned OpReg = getRegForValue(V);
- if (OpReg == 0) return false;
- unsigned ResultReg = createResultReg(&X86::FR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(X86::CVTSS2SDrr), ResultReg)
- .addReg(OpReg);
- updateValueMap(I, ResultReg);
- return true;
- }
+ if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
+ I->getOperand(0)->getType()->isFloatTy()) {
+ // fpext from float to double.
+ unsigned Opc = Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
+ return X86SelectFPExtOrFPTrunc(I, Opc, &X86::FR64RegClass);
}
return false;
}
bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
- if (X86ScalarSSEf64) {
- if (I->getType()->isFloatTy()) {
- const Value *V = I->getOperand(0);
- if (V->getType()->isDoubleTy()) {
- unsigned OpReg = getRegForValue(V);
- if (OpReg == 0) return false;
- unsigned ResultReg = createResultReg(&X86::FR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(X86::CVTSD2SSrr), ResultReg)
- .addReg(OpReg);
- updateValueMap(I, ResultReg);
- return true;
- }
- }
+ if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
+ I->getOperand(0)->getType()->isDoubleTy()) {
+ // fptrunc from double to float.
+ unsigned Opc = Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
+ return X86SelectFPExtOrFPTrunc(I, Opc, &X86::FR32RegClass);
}
return false;
@@ -2062,6 +2185,7 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
return true;
}
+ bool KillInputReg = false;
if (!Subtarget->is64Bit()) {
// If we're on x86-32; we can't extract an i8 from a general register.
// First issue a copy to GR16_ABCD or GR32_ABCD.
@@ -2071,11 +2195,12 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg);
InputReg = CopyReg;
+ KillInputReg = true;
}
// Issue an extract_subreg.
unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
- InputReg, /*Kill=*/true,
+ InputReg, KillInputReg,
X86::sub_8bit);
if (!ResultReg)
return false;
@@ -2127,7 +2252,73 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// FIXME: Handle more intrinsics.
switch (II->getIntrinsicID()) {
default: return false;
+ case Intrinsic::convert_from_fp16:
+ case Intrinsic::convert_to_fp16: {
+ if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
+ return false;
+
+ const Value *Op = II->getArgOperand(0);
+ unsigned InputReg = getRegForValue(Op);
+ if (InputReg == 0)
+ return false;
+
+ // F16C only allows converting from float to half and from half to float.
+ bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
+ if (IsFloatToHalf) {
+ if (!Op->getType()->isFloatTy())
+ return false;
+ } else {
+ if (!II->getType()->isFloatTy())
+ return false;
+ }
+
+ unsigned ResultReg = 0;
+ const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
+ if (IsFloatToHalf) {
+ // 'InputReg' is implicitly promoted from register class FR32 to
+ // register class VR128 by method 'constrainOperandRegClass' which is
+ // directly called by 'fastEmitInst_ri'.
+ // Instruction VCVTPS2PHrr takes an extra immediate operand which is
+ // used to provide rounding control.
+ InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 0);
+
+ // Move the lower 32-bits of ResultReg to another register of class GR32.
+ ResultReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(X86::VMOVPDI2DIrr), ResultReg)
+ .addReg(InputReg, RegState::Kill);
+
+ // The result value is in the lower 16-bits of ResultReg.
+ unsigned RegIdx = X86::sub_16bit;
+ ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
+ } else {
+ assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
+ // Explicitly sign-extend the input to 32-bit.
+ InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
+ /*Kill=*/false);
+
+ // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
+ InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
+ InputReg, /*Kill=*/true);
+
+ InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
+
+ // The result value is in the lower 32-bits of ResultReg.
+ // Emit an explicit copy from register class VR128 to register class FR32.
+ ResultReg = createResultReg(&X86::FR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(InputReg, RegState::Kill);
+ }
+
+ updateValueMap(II, ResultReg);
+ return true;
+ }
case Intrinsic::frameaddress: {
+ MachineFunction *MF = FuncInfo.MF;
+ if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
+ return false;
+
Type *RetTy = II->getCalledFunction()->getReturnType();
MVT VT;
@@ -2145,12 +2336,11 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// This needs to be set before we call getPtrSizedFrameRegister, otherwise
// we get the wrong frame register.
- MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
+ MachineFrameInfo *MFI = MF->getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
- TM.getSubtargetImpl()->getRegisterInfo());
- unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*(FuncInfo.MF));
+ const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
(FrameReg == X86::EBP && VT == MVT::i32)) &&
"Invalid Frame Register!");
@@ -2247,6 +2437,8 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
// FIXME may need to add RegState::Debug to any registers produced,
// although ESP/EBP should be the only ones at the moment.
+ assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+ "Expected inlined-at fields to agree");
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
.addImm(0)
.addMetadata(DI->getVariable())
@@ -2738,8 +2930,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
.addImm(NumBytes).addImm(0);
// Walk the register/memloc assignments, inserting copies/loads.
- const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
- TM.getSubtargetImpl()->getRegisterInfo());
+ const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign const &VA = ArgLocs[i];
const Value *ArgVal = OutVals[VA.getValNo()];
@@ -2870,7 +3061,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
- unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
assert((Subtarget->hasSSE1() || !NumXMMRegs)
&& "SSE registers cannot be used when SSE is disabled");
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
@@ -2934,7 +3125,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Add a register mask operand representing the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
// Add an implicit use GOT pointer in EBX.
if (Subtarget->isPICStyleGOT())
@@ -3044,6 +3235,8 @@ X86FastISel::fastSelectInstruction(const Instruction *I) {
return X86SelectFPExt(I);
case Instruction::FPTrunc:
return X86SelectFPTrunc(I);
+ case Instruction::SIToFP:
+ return X86SelectSIToFP(I);
case Instruction::IntToPtr: // Deliberate fall-through.
case Instruction::PtrToInt: {
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
@@ -3189,8 +3382,8 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
TII.get(Opc), ResultReg);
addDirectMem(MIB, AddrReg);
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
- TM.getSubtargetImpl()->getDataLayout()->getPointerSize(), Align);
+ MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
+ TM.getDataLayout()->getPointerSize(), Align);
MIB->addMemOperand(*FuncInfo.MF, MMO);
return ResultReg;
}
@@ -3224,7 +3417,10 @@ unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
ResultReg)
.addGlobalAddress(GV);
} else {
- unsigned Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
+ unsigned Opc = TLI.getPointerTy() == MVT::i32
+ ? (Subtarget->isTarget64BitILP32()
+ ? X86::LEA64_32r : X86::LEA32r)
+ : X86::LEA64r;
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg), AM);
}
@@ -3266,7 +3462,10 @@ unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
X86AddressMode AM;
if (!X86SelectAddress(C, AM))
return 0;
- unsigned Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
+ unsigned Opc = TLI.getPointerTy() == MVT::i32
+ ? (Subtarget->isTarget64BitILP32()
+ ? X86::LEA64_32r : X86::LEA32r)
+ : X86::LEA64r;
const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
unsigned ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -3331,14 +3530,32 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
SmallVector<MachineOperand, 8> AddrOps;
AM.getFullAddress(AddrOps);
- MachineInstr *Result =
- XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps,
- Size, Alignment, /*AllowCommute=*/true);
+ MachineInstr *Result = XII.foldMemoryOperandImpl(
+ *FuncInfo.MF, MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
+ /*AllowCommute=*/true);
if (!Result)
return false;
+ // The index register could be in the wrong register class. Unfortunately,
+ // foldMemoryOperandImpl could have commuted the instruction so its not enough
+ // to just look at OpNo + the offset to the index reg. We actually need to
+ // scan the instruction to find the index reg and see if its the correct reg
+ // class.
+ unsigned OperandNo = 0;
+ for (MachineInstr::mop_iterator I = Result->operands_begin(),
+ E = Result->operands_end(); I != E; ++I, ++OperandNo) {
+ MachineOperand &MO = *I;
+ if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
+ continue;
+ // Found the index reg, now try to rewrite it.
+ unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
+ MO.getReg(), OperandNo);
+ if (IndexReg == MO.getReg())
+ continue;
+ MO.setReg(IndexReg);
+ }
+
Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
- FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
MI->eraseFromParent();
return true;
}
OpenPOWER on IntegriCloud