summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/X86/X86FastISel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86FastISel.cpp')
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp463
1 files changed, 295 insertions, 168 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index 725d3fa..97f96ab 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "X86.h"
+#include "X86CallingConv.h"
#include "X86ISelLowering.h"
#include "X86InstrBuilder.h"
#include "X86RegisterInfo.h"
@@ -45,10 +46,6 @@ class X86FastISel : public FastISel {
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
- /// RegInfo - X86 register info.
- ///
- const X86RegisterInfo *RegInfo;
-
/// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
/// floating point ops.
/// When SSE is available, use it for f32 operations.
@@ -63,7 +60,6 @@ public:
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
- RegInfo = static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
}
virtual bool TargetSelectInstruction(const Instruction *I);
@@ -84,8 +80,10 @@ private:
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
- bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
- bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
+ bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
+ bool Aligned = false);
+ bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM,
+ bool Aligned = false);
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
unsigned &ResultReg);
@@ -128,6 +126,8 @@ private:
return static_cast<const X86TargetMachine *>(&TM);
}
+ bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
+
unsigned TargetMaterializeConstant(const Constant *C);
unsigned TargetMaterializeAlloca(const AllocaInst *C);
@@ -238,7 +238,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
/// and a displacement offset, or a GlobalAddress,
/// i.e. V. Return true if it is possible.
bool
-X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
+X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
+ const X86AddressMode &AM, bool Aligned) {
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
@@ -248,8 +249,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
// Mask out all but lowest bit.
unsigned AndResult = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
- Val = AndResult;
+ TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1);
+ ValReg = AndResult;
}
// FALLTHROUGH, handling i1 as i8.
case MVT::i8: Opc = X86::MOV8mr; break;
@@ -265,26 +266,35 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
(Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
break;
case MVT::v4f32:
- Opc = X86::MOVAPSmr;
+ if (Aligned)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
break;
case MVT::v2f64:
- Opc = X86::MOVAPDmr;
+ if (Aligned)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
break;
case MVT::v4i32:
case MVT::v2i64:
case MVT::v8i16:
case MVT::v16i8:
- Opc = X86::MOVDQAmr;
+ if (Aligned)
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
break;
}
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DL, TII.get(Opc)), AM).addReg(Val);
+ DL, TII.get(Opc)), AM).addReg(ValReg);
return true;
}
bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
- const X86AddressMode &AM) {
+ const X86AddressMode &AM, bool Aligned) {
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Val))
Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
@@ -319,7 +329,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
if (ValReg == 0)
return false;
- return X86FastEmitStore(VT, ValReg, AM);
+ return X86FastEmitStore(VT, ValReg, AM, Aligned);
}
/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
@@ -337,9 +347,126 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
return true;
}
+bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
+ // Handle constant address.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return false;
+
+ // Can't handle TLS yet.
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
+ // it works...).
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ if (const GlobalVariable *GVar =
+ dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // RIP-relative addresses can't have additional register operands, so if
+ // we've already folded stuff into the addressing mode, just force the
+ // global value into its own register, which we can use as the basereg.
+ if (!Subtarget->isPICStyleRIPRel() ||
+ (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
+ // Okay, we've committed to selecting this global. Set up the address.
+ AM.GV = GV;
+
+ // Allow the subtarget to classify the global.
+ unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
+
+ // If this reference is relative to the pic base, set it now.
+ if (isGlobalRelativeToPICBase(GVFlags)) {
+ // FIXME: How do we know Base.Reg is free??
+ AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ }
+
+ // Unless the ABI requires an extra load, return a direct reference to
+ // the global.
+ if (!isGlobalStubReference(GVFlags)) {
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ }
+ AM.GVOpFlags = GVFlags;
+ return true;
+ }
+
+ // Ok, we need to do a load from a stub. If we've already loaded from
+ // this stub, reuse the loaded pointer, otherwise emit the load now.
+ DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+ unsigned LoadReg;
+ if (I != LocalValueMap.end() && I->second != 0) {
+ LoadReg = I->second;
+ } else {
+ // Issue load from stub.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ X86AddressMode StubAM;
+ StubAM.Base.Reg = AM.Base.Reg;
+ StubAM.GV = GV;
+ StubAM.GVOpFlags = GVFlags;
+
+ // Prepare for inserting code in the local-value area.
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ if (TLI.getPointerTy() == MVT::i64) {
+ Opc = X86::MOV64rm;
+ RC = &X86::GR64RegClass;
+
+ if (Subtarget->isPICStyleRIPRel())
+ StubAM.Base.Reg = X86::RIP;
+ } else {
+ Opc = X86::MOV32rm;
+ RC = &X86::GR32RegClass;
+ }
+
+ LoadReg = createResultReg(RC);
+ MachineInstrBuilder LoadMI =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
+ addFullAddress(LoadMI, StubAM);
+
+ // Ok, back to normal mode.
+ leaveLocalValueArea(SaveInsertPt);
+
+ // Prevent loading GV stub multiple times in same MBB.
+ LocalValueMap[V] = LoadReg;
+ }
+
+ // Now construct the final address. Note that the Disp, Scale,
+ // and Index values may already be set here.
+ AM.Base.Reg = LoadReg;
+ AM.GV = 0;
+ return true;
+ }
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+ if (AM.Base.Reg == 0) {
+ AM.Base.Reg = getRegForValue(V);
+ return AM.Base.Reg != 0;
+ }
+ if (AM.IndexReg == 0) {
+ assert(AM.Scale == 1 && "Scale with no index!");
+ AM.IndexReg = getRegForValue(V);
+ return AM.IndexReg != 0;
+ }
+ }
+
+ return false;
+}
+
/// X86SelectAddress - Attempt to fill in an address from the given value.
///
bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
+ SmallVector<const Value *, 32> GEPs;
+redo_gep:
const User *U = NULL;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(V)) {
@@ -434,13 +561,8 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
Disp += CI->getSExtValue() * S;
break;
}
- if (isa<AddOperator>(Op) &&
- (!isa<Instruction>(Op) ||
- FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
- == FuncInfo.MBB) &&
- isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
- // An add (in the same block) with a constant operand. Fold the
- // constant.
+ if (canFoldAddIntoGEP(U, Op)) {
+ // A compatible add with a constant operand. Fold the constant.
ConstantInt *CI =
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
Disp += CI->getSExtValue() * S;
@@ -462,139 +584,43 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
goto unsupported_gep;
}
}
+
// Check for displacement overflow.
if (!isInt<32>(Disp))
break;
- // Ok, the GEP indices were covered by constant-offset and scaled-index
- // addressing. Update the address state and move on to examining the base.
+
AM.IndexReg = IndexReg;
AM.Scale = Scale;
AM.Disp = (uint32_t)Disp;
- if (X86SelectAddress(U->getOperand(0), AM))
+ GEPs.push_back(V);
+
+ if (const GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
+ // Ok, the GEP indices were covered by constant-offset and scaled-index
+ // addressing. Update the address state and move on to examining the base.
+ V = GEP;
+ goto redo_gep;
+ } else if (X86SelectAddress(U->getOperand(0), AM)) {
return true;
+ }
// If we couldn't merge the gep value into this addr mode, revert back to
// our address and just match the value instead of completely failing.
AM = SavedAM;
- break;
- unsupported_gep:
- // Ok, the GEP indices weren't all covered.
- break;
- }
- }
-
- // Handle constant address.
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- // Can't handle alternate code models yet.
- if (TM.getCodeModel() != CodeModel::Small)
- return false;
-
- // Can't handle TLS yet.
- if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
- if (GVar->isThreadLocal())
- return false;
- // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
- // it works...).
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- if (const GlobalVariable *GVar =
- dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)))
- if (GVar->isThreadLocal())
- return false;
-
- // RIP-relative addresses can't have additional register operands, so if
- // we've already folded stuff into the addressing mode, just force the
- // global value into its own register, which we can use as the basereg.
- if (!Subtarget->isPICStyleRIPRel() ||
- (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
- // Okay, we've committed to selecting this global. Set up the address.
- AM.GV = GV;
-
- // Allow the subtarget to classify the global.
- unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
-
- // If this reference is relative to the pic base, set it now.
- if (isGlobalRelativeToPICBase(GVFlags)) {
- // FIXME: How do we know Base.Reg is free??
- AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
- }
-
- // Unless the ABI requires an extra load, return a direct reference to
- // the global.
- if (!isGlobalStubReference(GVFlags)) {
- if (Subtarget->isPICStyleRIPRel()) {
- // Use rip-relative addressing if we can. Above we verified that the
- // base and index registers are unused.
- assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
- AM.Base.Reg = X86::RIP;
- }
- AM.GVOpFlags = GVFlags;
+ for (SmallVectorImpl<const Value *>::reverse_iterator
+ I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I)
+ if (handleConstantAddresses(*I, AM))
return true;
- }
-
- // Ok, we need to do a load from a stub. If we've already loaded from
- // this stub, reuse the loaded pointer, otherwise emit the load now.
- DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
- unsigned LoadReg;
- if (I != LocalValueMap.end() && I->second != 0) {
- LoadReg = I->second;
- } else {
- // Issue load from stub.
- unsigned Opc = 0;
- const TargetRegisterClass *RC = NULL;
- X86AddressMode StubAM;
- StubAM.Base.Reg = AM.Base.Reg;
- StubAM.GV = GV;
- StubAM.GVOpFlags = GVFlags;
-
- // Prepare for inserting code in the local-value area.
- SavePoint SaveInsertPt = enterLocalValueArea();
-
- if (TLI.getPointerTy() == MVT::i64) {
- Opc = X86::MOV64rm;
- RC = &X86::GR64RegClass;
-
- if (Subtarget->isPICStyleRIPRel())
- StubAM.Base.Reg = X86::RIP;
- } else {
- Opc = X86::MOV32rm;
- RC = &X86::GR32RegClass;
- }
-
- LoadReg = createResultReg(RC);
- MachineInstrBuilder LoadMI =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
- addFullAddress(LoadMI, StubAM);
- // Ok, back to normal mode.
- leaveLocalValueArea(SaveInsertPt);
-
- // Prevent loading GV stub multiple times in same MBB.
- LocalValueMap[V] = LoadReg;
- }
-
- // Now construct the final address. Note that the Disp, Scale,
- // and Index values may already be set here.
- AM.Base.Reg = LoadReg;
- AM.GV = 0;
- return true;
- }
+ return false;
+ unsupported_gep:
+ // Ok, the GEP indices weren't all covered.
+ break;
}
-
- // If all else fails, try to materialize the value in a register.
- if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
- if (AM.Base.Reg == 0) {
- AM.Base.Reg = getRegForValue(V);
- return AM.Base.Reg != 0;
- }
- if (AM.IndexReg == 0) {
- assert(AM.Scale == 1 && "Scale with no index!");
- AM.IndexReg = getRegForValue(V);
- return AM.IndexReg != 0;
- }
}
- return false;
+ return handleConstantAddresses(V, AM);
}
/// X86SelectCallAddress - Attempt to fill in an address from the given value.
@@ -602,9 +628,35 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
const User *U = NULL;
unsigned Opcode = Instruction::UserOp1;
- if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ const Instruction *I = dyn_cast<Instruction>(V);
+ // Record if the value is defined in the same basic block.
+ //
+ // This information is crucial to know whether or not folding an
+ // operand is valid.
+ // Indeed, FastISel generates or reuses a virtual register for all
+ // operands of all instructions it selects. Obviously, the definition and
+ // its uses must use the same virtual register otherwise the produced
+ // code is incorrect.
+ // Before instruction selection, FunctionLoweringInfo::set sets the virtual
+ // registers for values that are alive across basic blocks. This ensures
+ // that the values are consistently set between across basic block, even
+ // if different instruction selection mechanisms are used (e.g., a mix of
+ // SDISel and FastISel).
+ // For values local to a basic block, the instruction selection process
+ // generates these virtual registers with whatever method is appropriate
+ // for its needs. In particular, FastISel and SDISel do not share the way
+ // local virtual registers are set.
+ // Therefore, this is impossible (or at least unsafe) to share values
+ // between basic blocks unless they use the same instruction selection
+ // method, which is not guarantee for X86.
+ // Moreover, things like hasOneUse could not be used accurately, if we
+ // allow to reference values across basic blocks whereas they are not
+ // alive across basic blocks initially.
+ bool InMBB = true;
+ if (I) {
Opcode = I->getOpcode();
U = I;
+ InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
Opcode = C->getOpcode();
U = C;
@@ -613,18 +665,22 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
switch (Opcode) {
default: break;
case Instruction::BitCast:
- // Look past bitcasts.
- return X86SelectCallAddress(U->getOperand(0), AM);
+ // Look past bitcasts if its operand is in the same BB.
+ if (InMBB)
+ return X86SelectCallAddress(U->getOperand(0), AM);
+ break;
case Instruction::IntToPtr:
- // Look past no-op inttoptrs.
- if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ // Look past no-op inttoptrs if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
return X86SelectCallAddress(U->getOperand(0), AM);
break;
case Instruction::PtrToInt:
- // Look past no-op ptrtoints.
- if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ // Look past no-op ptrtoints if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getType()) == TLI.getPointerTy())
return X86SelectCallAddress(U->getOperand(0), AM);
break;
}
@@ -693,6 +749,10 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
if (S->isAtomic())
return false;
+ unsigned SABIAlignment =
+ TD.getABITypeAlignment(S->getValueOperand()->getType());
+ bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment;
+
MVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
return false;
@@ -701,7 +761,7 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
if (!X86SelectAddress(I->getOperand(1), AM))
return false;
- return X86FastEmitStore(VT, I->getOperand(0), AM);
+ return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned);
}
/// X86SelectRet - Select and emit code to implement ret instructions.
@@ -1006,10 +1066,6 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
}
bool X86FastISel::X86SelectZExt(const Instruction *I) {
- // Handle zero-extension from i1 to i8, which is common.
- if (!I->getOperand(0)->getType()->isIntegerTy(1))
- return false;
-
EVT DstVT = TLI.getValueType(I->getType());
if (!TLI.isTypeLegal(DstVT))
return false;
@@ -1018,12 +1074,37 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
if (ResultReg == 0)
return false;
- // Set the high bits to zero.
- ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
- if (ResultReg == 0)
- return false;
+ // Handle zero-extension from i1 to i8, which is common.
+ MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType());
+ if (SrcVT.SimpleTy == MVT::i1) {
+ // Set the high bits to zero.
+ ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
+ SrcVT = MVT::i8;
+
+ if (ResultReg == 0)
+ return false;
+ }
+
+ if (DstVT == MVT::i64) {
+ // Handle extension to 64-bits via sub-register shenanigans.
+ unsigned MovInst;
+
+ switch (SrcVT.SimpleTy) {
+ case MVT::i8: MovInst = X86::MOVZX32rr8; break;
+ case MVT::i16: MovInst = X86::MOVZX32rr16; break;
+ case MVT::i32: MovInst = X86::MOV32rr; break;
+ default: llvm_unreachable("Unexpected zext to i64 source type");
+ }
+
+ unsigned Result32 = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovInst), Result32)
+ .addReg(ResultReg);
- if (DstVT != MVT::i8) {
+ ResultReg = createResultReg(&X86::GR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::SUBREG_TO_REG),
+ ResultReg)
+ .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
+ } else if (DstVT != MVT::i8) {
ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
ResultReg, /*Kill=*/true);
if (ResultReg == 0)
@@ -1274,8 +1355,8 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
{ &X86::GR16RegClass, X86::AX, X86::DX, {
{ X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
{ X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
- { X86::DIV16r, X86::MOV16r0, Copy, X86::AX, U }, // UDiv
- { X86::DIV16r, X86::MOV16r0, Copy, X86::DX, U }, // URem
+ { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
+ { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
}
}, // i16
{ &X86::GR32RegClass, X86::EAX, X86::EDX, {
@@ -1288,8 +1369,8 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
{ &X86::GR64RegClass, X86::RAX, X86::RDX, {
{ X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
{ X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
- { X86::DIV64r, X86::MOV64r0, Copy, X86::RAX, U }, // UDiv
- { X86::DIV64r, X86::MOV64r0, Copy, X86::RDX, U }, // URem
+ { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
+ { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
}
}, // i64
};
@@ -1335,17 +1416,63 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
if (OpEntry.IsOpSigned)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(OpEntry.OpSignExtend));
- else
+ else {
+ unsigned Zero32 = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(OpEntry.OpSignExtend), TypeEntry.HighInReg);
+ TII.get(X86::MOV32r0), Zero32);
+
+ // Copy the zero into the appropriate sub/super/identical physical
+ // register. Unfortunately the operations needed are not uniform enough to
+ // fit neatly into the table above.
+ if (VT.SimpleTy == MVT::i16) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Copy), TypeEntry.HighInReg)
+ .addReg(Zero32, 0, X86::sub_16bit);
+ } else if (VT.SimpleTy == MVT::i32) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Copy), TypeEntry.HighInReg)
+ .addReg(Zero32);
+ } else if (VT.SimpleTy == MVT::i64) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
+ .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
+ }
+ }
}
// Generate the DIV/IDIV instruction.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
- // Copy output register into result register.
- unsigned ResultReg = createResultReg(TypeEntry.RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(Copy), ResultReg).addReg(OpEntry.DivRemResultReg);
+ // For i8 remainder, we can't reference AH directly, as we'll end
+ // up with bogus copies like %R9B = COPY %AH. Reference AX
+ // instead to prevent AH references in a REX instruction.
+ //
+ // The current assumption of the fast register allocator is that isel
+ // won't generate explicit references to the GPR8_NOREX registers. If
+ // the allocator and/or the backend get enhanced to be more robust in
+ // that regard, this can be, and should be, removed.
+ unsigned ResultReg = 0;
+ if ((I->getOpcode() == Instruction::SRem ||
+ I->getOpcode() == Instruction::URem) &&
+ OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
+ unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
+ unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Copy), SourceSuperReg).addReg(X86::AX);
+
+ // Shift AX right by 8 bits instead of using AH.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SHR16ri),
+ ResultSuperReg).addReg(SourceSuperReg).addImm(8);
+
+ // Now reference the 8-bit subreg of the result.
+ ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
+ /*Kill=*/true, X86::sub_8bit);
+ }
+ // Copy the result out of the physreg if we haven't already.
+ if (!ResultReg) {
+ ResultReg = createResultReg(TypeEntry.RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Copy), ResultReg)
+ .addReg(OpEntry.DivRemResultReg);
+ }
UpdateValueMap(I, ResultReg);
return true;
@@ -1698,8 +1825,6 @@ bool X86FastISel::FastLowerArguments() {
const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64);
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I, ++Idx) {
- if (I->use_empty())
- continue;
bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32;
const TargetRegisterClass *RC = is32Bit ? RC32 : RC64;
unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx];
@@ -1988,6 +2113,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
} else {
unsigned LocMemOffset = VA.getLocMemOffset();
X86AddressMode AM;
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo*>(
+ getTargetMachine()->getRegisterInfo());
AM.Base.Reg = RegInfo->getStackRegister();
AM.Disp = LocMemOffset;
const Value *ArgVal = ArgVals[VA.getValNo()];
OpenPOWER on IntegriCloud