summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/X86
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2011-06-12 18:01:31 +0000
committerdim <dim@FreeBSD.org>2011-06-12 18:01:31 +0000
commitd4c7939beafe09c033866ebd290e274af0cc826d (patch)
treea9b264321873e7d25e69b8671c9f705ebc6d30ee /contrib/llvm/lib/Target/X86
parentb164882ef981a8ed5c085469231831e221fa1323 (diff)
parentece02cd5829cea836e9365b0845a8ef042d17b0a (diff)
downloadFreeBSD-src-d4c7939beafe09c033866ebd290e274af0cc826d.zip
FreeBSD-src-d4c7939beafe09c033866ebd290e274af0cc826d.tar.gz
Upgrade our copy of llvm/clang to r132879, from upstream's trunk.
Diffstat (limited to 'contrib/llvm/lib/Target/X86')
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp5
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h1
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp4
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h1
-rw-r--r--contrib/llvm/lib/Target/X86/X86.td4
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp485
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp169
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp347
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h31
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCompiler.td220
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrExtension.td39
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td3
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp84
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.h1
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td12
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrMMX.td2
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td153
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCAsmInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp3
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCInstLower.cpp4
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp109
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.h4
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.td487
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.cpp1
25 files changed, 1170 insertions, 1003 deletions
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index dd6e353..68247d2 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -41,6 +41,11 @@ X86ATTInstPrinter::X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
&TM.getSubtarget<X86Subtarget>()));
}
+void X86ATTInstPrinter::printRegName(raw_ostream &OS,
+ unsigned RegNo) const {
+ OS << '%' << getRegisterName(RegNo);
+}
+
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
// Try to print any aliases first.
if (!printAliasInstr(MI, OS))
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 8d69391..5f939b6 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -26,6 +26,7 @@ class X86ATTInstPrinter : public MCInstPrinter {
public:
X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI);
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
virtual void printInst(const MCInst *MI, raw_ostream &OS);
virtual StringRef getOpcodeName(unsigned Opcode) const;
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 47253eb..5f581ba 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -29,6 +29,10 @@ using namespace llvm;
#define GET_INSTRUCTION_NAME
#include "X86GenAsmWriter1.inc"
+void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
printInstruction(MI, OS);
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index ca99dc0..c8030c3 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -27,6 +27,7 @@ public:
X86IntelInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
: MCInstPrinter(MAI) {}
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
virtual void printInst(const MCInst *MI, raw_ostream &OS);
virtual StringRef getOpcodeName(unsigned Opcode) const;
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
index 25b8d3e..7bb9676 100644
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -101,8 +101,10 @@ def : Proc<"i686", []>;
def : Proc<"pentiumpro", [FeatureCMOV]>;
def : Proc<"pentium2", [FeatureMMX, FeatureCMOV]>;
def : Proc<"pentium3", [FeatureSSE1]>;
+def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>;
def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSSE2]>;
+def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
@@ -122,7 +124,7 @@ def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
// FIXME: Disabling AVX for now since it's not ready.
-def : Proc<"sandybridge", [FeatureSSE42, Feature64Bit,
+def : Proc<"corei7-avx", [FeatureSSE42, Feature64Bit,
FeatureAES, FeatureCLMUL]>;
def : Proc<"k6", [FeatureMMX]>;
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index 1382f18..f1b9972 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -108,11 +108,11 @@ private:
bool X86SelectFPExt(const Instruction *I);
bool X86SelectFPTrunc(const Instruction *I);
- bool X86SelectExtractValue(const Instruction *I);
-
bool X86VisitIntrinsicCall(const IntrinsicInst &I);
bool X86SelectCall(const Instruction *I);
+ bool DoSelectCall(const Instruction *I, const char *MemIntName);
+
const X86InstrInfo *getInstrInfo() const {
return getTargetMachine()->getInstrInfo();
}
@@ -135,6 +135,8 @@ private:
bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
+ bool IsMemcpySmall(uint64_t Len);
+
bool TryEmitSmallMemcpy(X86AddressMode DestAM,
X86AddressMode SrcAM, uint64_t Len);
};
@@ -401,7 +403,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
continue;
}
-
+
// A array/variable index is always of the form i*S where S is the
// constant scale size. See if we can push the scale into immediates.
uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
@@ -469,7 +471,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
if (GVar->isThreadLocal())
return false;
-
+
// RIP-relative addresses can't have additional register operands, so if
// we've already folded stuff into the addressing mode, just force the
// global value into its own register, which we can use as the basereg.
@@ -704,7 +706,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
+ I->getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
const Value *RV = Ret->getOperand(0);
@@ -724,18 +727,38 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// Only handle register returns for now.
if (!VA.isRegLoc())
return false;
- // TODO: For now, don't try to handle cases where getLocInfo()
- // says Full but the types don't match.
- if (TLI.getValueType(RV->getType()) != VA.getValVT())
- return false;
// The calling-convention tables for x87 returns don't tell
// the whole story.
if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
return false;
- // Make the copy.
unsigned SrcReg = Reg + VA.getValNo();
+ EVT SrcVT = TLI.getValueType(RV->getType());
+ EVT DstVT = VA.getValVT();
+ // Special handling for extended integers.
+ if (SrcVT != DstVT) {
+ if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
+ return false;
+
+ if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
+ return false;
+
+ assert(DstVT == MVT::i32 && "X86 should always ext to i32");
+
+ if (SrcVT == MVT::i1) {
+ if (Outs[0].Flags.isSExt())
+ return false;
+ SrcReg = FastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
+ SrcVT = MVT::i8;
+ }
+ unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
+ ISD::SIGN_EXTEND;
+ SrcReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
+ SrcReg, /*TODO: Kill=*/false);
+ }
+
+ // Make the copy.
unsigned DstReg = VA.getLocReg();
const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
// Avoid a cross-class copy. This is very unlikely.
@@ -916,18 +939,31 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
bool X86FastISel::X86SelectZExt(const Instruction *I) {
// Handle zero-extension from i1 to i8, which is common.
- if (I->getType()->isIntegerTy(8) &&
- I->getOperand(0)->getType()->isIntegerTy(1)) {
- unsigned ResultReg = getRegForValue(I->getOperand(0));
- if (ResultReg == 0) return false;
- // Set the high bits to zero.
- ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
- if (ResultReg == 0) return false;
- UpdateValueMap(I, ResultReg);
- return true;
+ if (!I->getOperand(0)->getType()->isIntegerTy(1))
+ return false;
+
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (!TLI.isTypeLegal(DstVT))
+ return false;
+
+ unsigned ResultReg = getRegForValue(I->getOperand(0));
+ if (ResultReg == 0)
+ return false;
+
+ // Set the high bits to zero.
+ ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
+ if (ResultReg == 0)
+ return false;
+
+ if (DstVT != MVT::i8) {
+ ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
+ ResultReg, /*Kill=*/true);
+ if (ResultReg == 0)
+ return false;
}
- return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
}
@@ -1010,63 +1046,6 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
FuncInfo.MBB->addSuccessor(TrueMBB);
return true;
}
- } else if (ExtractValueInst *EI =
- dyn_cast<ExtractValueInst>(BI->getCondition())) {
- // Check to see if the branch instruction is from an "arithmetic with
- // overflow" intrinsic. The main way these intrinsics are used is:
- //
- // %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
- // %sum = extractvalue { i32, i1 } %t, 0
- // %obit = extractvalue { i32, i1 } %t, 1
- // br i1 %obit, label %overflow, label %normal
- //
- // The %sum and %obit are converted in an ADD and a SETO/SETB before
- // reaching the branch. Therefore, we search backwards through the MBB
- // looking for the SETO/SETB instruction. If an instruction modifies the
- // EFLAGS register before we reach the SETO/SETB instruction, then we can't
- // convert the branch into a JO/JB instruction.
- if (const IntrinsicInst *CI =
- dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
- if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
- CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
- const MachineInstr *SetMI = 0;
- unsigned Reg = getRegForValue(EI);
-
- for (MachineBasicBlock::const_reverse_iterator
- RI = FuncInfo.MBB->rbegin(), RE = FuncInfo.MBB->rend();
- RI != RE; ++RI) {
- const MachineInstr &MI = *RI;
-
- if (MI.definesRegister(Reg)) {
- if (MI.isCopy()) {
- Reg = MI.getOperand(1).getReg();
- continue;
- }
-
- SetMI = &MI;
- break;
- }
-
- const TargetInstrDesc &TID = MI.getDesc();
- if (TID.hasImplicitDefOfPhysReg(X86::EFLAGS) ||
- MI.hasUnmodeledSideEffects())
- break;
- }
-
- if (SetMI) {
- unsigned OpCode = SetMI->getOpcode();
-
- if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(OpCode == X86::SETOr ? X86::JO_4 : X86::JB_4))
- .addMBB(TrueMBB);
- FastEmitBranch(FalseMBB, DL);
- FuncInfo.MBB->addSuccessor(TrueMBB);
- return true;
- }
- }
- }
- }
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
// Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
// typically happen for _Bool and C++ bools.
@@ -1086,13 +1065,13 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
if (OpReg == 0) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc))
.addReg(OpReg).addImm(1);
-
+
unsigned JmpOpc = X86::JNE_4;
if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
std::swap(TrueMBB, FalseMBB);
JmpOpc = X86::JE_4;
}
-
+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc))
.addMBB(TrueMBB);
FastEmitBranch(FalseMBB, DL);
@@ -1266,18 +1245,13 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
}
bool X86FastISel::X86SelectTrunc(const Instruction *I) {
- if (Subtarget->is64Bit())
- // All other cases should be handled by the tblgen generated code.
- return false;
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
- // This code only handles truncation to byte right now.
+ // This code only handles truncation to byte.
if (DstVT != MVT::i8 && DstVT != MVT::i1)
- // All other cases should be handled by the tblgen generated code.
return false;
- if (SrcVT != MVT::i16 && SrcVT != MVT::i32)
- // All other cases should be handled by the tblgen generated code.
+ if (!TLI.isTypeLegal(SrcVT))
return false;
unsigned InputReg = getRegForValue(I->getOperand(0));
@@ -1285,16 +1259,26 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
// Unhandled operand. Halt "fast" selection and bail.
return false;
- // First issue a copy to GR16_ABCD or GR32_ABCD.
- const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
- ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
- unsigned CopyReg = createResultReg(CopyRC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- CopyReg).addReg(InputReg);
+ if (SrcVT == MVT::i8) {
+ // Truncate from i8 to i1; no code needed.
+ UpdateValueMap(I, InputReg);
+ return true;
+ }
+
+ if (!Subtarget->is64Bit()) {
+ // If we're on x86-32; we can't extract an i8 from a general register.
+ // First issue a copy to GR16_ABCD or GR32_ABCD.
+ const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
+ ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
+ unsigned CopyReg = createResultReg(CopyRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CopyReg).addReg(InputReg);
+ InputReg = CopyReg;
+ }
- // Then issue an extract_subreg.
+ // Issue an extract_subreg.
unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
- CopyReg, /*Kill=*/true,
+ InputReg, /*Kill=*/true,
X86::sub_8bit);
if (!ResultReg)
return false;
@@ -1303,36 +1287,18 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
return true;
}
-bool X86FastISel::X86SelectExtractValue(const Instruction *I) {
- const ExtractValueInst *EI = cast<ExtractValueInst>(I);
- const Value *Agg = EI->getAggregateOperand();
-
- if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) {
- switch (CI->getIntrinsicID()) {
- default: break;
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow: {
- // Cheat a little. We know that the registers for "add" and "seto" are
- // allocated sequentially. However, we only keep track of the register
- // for "add" in the value map. Use extractvalue's index to get the
- // correct register for "seto".
- unsigned OpReg = getRegForValue(Agg);
- if (OpReg == 0)
- return false;
- UpdateValueMap(I, OpReg + *EI->idx_begin());
- return true;
- }
- }
- }
-
- return false;
+bool X86FastISel::IsMemcpySmall(uint64_t Len) {
+ return Len <= (Subtarget->is64Bit() ? 32 : 16);
}
bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
X86AddressMode SrcAM, uint64_t Len) {
+
// Make sure we don't bloat code by inlining very large memcpy's.
- bool i64Legal = TLI.isTypeLegal(MVT::i64);
- if (Len > (i64Legal ? 32 : 16)) return false;
+ if (!IsMemcpySmall(Len))
+ return false;
+
+ bool i64Legal = Subtarget->is64Bit();
// We don't care about alignment here since we just emit integer accesses.
while (Len) {
@@ -1369,20 +1335,44 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
case Intrinsic::memcpy: {
const MemCpyInst &MCI = cast<MemCpyInst>(I);
// Don't handle volatile or variable length memcpys.
- if (MCI.isVolatile() || !isa<ConstantInt>(MCI.getLength()))
+ if (MCI.isVolatile())
+ return false;
+
+ if (isa<ConstantInt>(MCI.getLength())) {
+ // Small memcpy's are common enough that we want to do them
+ // without a call if possible.
+ uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
+ if (IsMemcpySmall(Len)) {
+ X86AddressMode DestAM, SrcAM;
+ if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
+ !X86SelectAddress(MCI.getRawSource(), SrcAM))
+ return false;
+ TryEmitSmallMemcpy(DestAM, SrcAM, Len);
+ return true;
+ }
+ }
+
+ unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+ if (!MCI.getLength()->getType()->isIntegerTy(SizeWidth))
return false;
- uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
-
- // Get the address of the dest and source addresses.
- X86AddressMode DestAM, SrcAM;
- if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
- !X86SelectAddress(MCI.getRawSource(), SrcAM))
+ if (MCI.getSourceAddressSpace() > 255 || MCI.getDestAddressSpace() > 255)
return false;
- return TryEmitSmallMemcpy(DestAM, SrcAM, Len);
+ return DoSelectCall(&I, "memcpy");
+ }
+ case Intrinsic::memset: {
+ const MemSetInst &MSI = cast<MemSetInst>(I);
+
+ unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+ if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth))
+ return false;
+
+ if (MSI.getDestAddressSpace() > 255)
+ return false;
+
+ return DoSelectCall(&I, "memset");
}
-
case Intrinsic::stackprotector: {
// Emit code inline code to store the stack guard onto the stack.
EVT PtrTy = TLI.getPointerTy();
@@ -1396,29 +1386,6 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
return true;
}
- case Intrinsic::objectsize: {
- // FIXME: This should be moved to generic code!
- ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
- const Type *Ty = I.getCalledFunction()->getReturnType();
-
- MVT VT;
- if (!isTypeLegal(Ty, VT))
- return false;
-
- unsigned OpC = 0;
- if (VT == MVT::i32)
- OpC = X86::MOV32ri;
- else if (VT == MVT::i64)
- OpC = X86::MOV64ri;
- else
- return false;
-
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg).
- addImm(CI->isZero() ? -1ULL : 0);
- UpdateValueMap(&I, ResultReg);
- return true;
- }
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
X86AddressMode AM;
@@ -1439,12 +1406,9 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow: {
// FIXME: Should fold immediates.
-
+
// Replace "add with overflow" intrinsics with an "add" instruction followed
- // by a seto/setc instruction. Later on, when the "extractvalue"
- // instructions are encountered, we use the fact that two registers were
- // created sequentially to get the correct registers for the "sum" and the
- // "overflow bit".
+ // by a seto/setc instruction.
const Function *Callee = I.getCalledFunction();
const Type *RetTy =
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
@@ -1470,27 +1434,18 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
else
return false;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ // The call to CreateRegs builds two sequential registers, to store the
+ // both the the returned values.
+ unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
.addReg(Reg1).addReg(Reg2);
- unsigned DestReg1 = UpdateValueMap(&I, ResultReg);
-
- // If the add with overflow is an intra-block value then we just want to
- // create temporaries for it like normal. If it is a cross-block value then
- // UpdateValueMap will return the cross-block register used. Since we
- // *really* want the value to be live in the register pair known by
- // UpdateValueMap, we have to use DestReg1+1 as the destination register in
- // the cross block case. In the non-cross-block case, we should just make
- // another register for the value.
- if (DestReg1 != ResultReg)
- ResultReg = DestReg1+1;
- else
- ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
unsigned Opc = X86::SETBr;
if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
Opc = X86::SETOr;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg+1);
+
+ UpdateValueMap(&I, ResultReg, 2);
return true;
}
}
@@ -1508,6 +1463,14 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
return X86VisitIntrinsicCall(*II);
+ return DoSelectCall(I, 0);
+}
+
+// Select either a call, or an llvm.memcpy/memmove/memset intrinsic
+bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
// Handle only C and fastcc calling conventions for now.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();
@@ -1533,12 +1496,15 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (Subtarget->IsCalleePop(isVarArg, CC))
return false;
- // Handle *simple* calls for now.
- const Type *RetTy = CS.getType();
- MVT RetVT;
- if (RetTy->isVoidTy())
- RetVT = MVT::isVoid;
- else if (!isTypeLegal(RetTy, RetVT, true))
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ SmallVector<uint64_t, 4> Offsets;
+ GetReturnInfo(I->getType(), CS.getAttributes().getRetAttributes(),
+ Outs, TLI, &Offsets);
+ bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
+ *FuncInfo.MF, FTy->isVarArg(),
+ Outs, FTy->getContext());
+ if (!CanLowerReturn)
return false;
// Materialize callee address in a register. FIXME: GV address can be
@@ -1555,13 +1521,6 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
} else
return false;
- // Allow calls which produce i1 results.
- bool AndToI1 = false;
- if (RetVT == MVT::i1) {
- RetVT = MVT::i8;
- AndToI1 = true;
- }
-
// Deal with call operands first.
SmallVector<const Value *, 8> ArgVals;
SmallVector<unsigned, 8> Args;
@@ -1573,6 +1532,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
ArgFlags.reserve(CS.arg_size());
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
+ // If we're lowering a mem intrinsic instead of a regular call, skip the
+ // last two arguments, which should not passed to the underlying functions.
+ if (MemIntName && e-i <= 2)
+ break;
Value *ArgVal = *i;
ISD::ArgFlagsTy Flags;
unsigned AttrInd = i - CS.arg_begin() + 1;
@@ -1581,6 +1544,25 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
Flags.setZExt();
+ if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
+ const PointerType *Ty = cast<PointerType>(ArgVal->getType());
+ const Type *ElementTy = Ty->getElementType();
+ unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
+ unsigned FrameAlign = CS.getParamAlignment(AttrInd);
+ if (!FrameAlign)
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+ Flags.setByVal();
+ Flags.setByValSize(FrameSize);
+ Flags.setByValAlign(FrameAlign);
+ if (!IsMemcpySmall(FrameSize))
+ return false;
+ }
+
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg))
+ Flags.setInReg();
+ if (CS.paramHasAttr(AttrInd, Attribute::Nest))
+ Flags.setNest();
+
// If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
// instruction. This is safe because it is common to all fastisel supported
// calling conventions on x86.
@@ -1593,9 +1575,9 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext()));
}
}
-
+
unsigned ArgReg;
-
+
// Passing bools around ends up doing a trunc to i1 and passing it.
// Codegen this as an argument + "and 1".
if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
@@ -1604,10 +1586,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
ArgReg = getRegForValue(ArgVal);
if (ArgReg == 0) return false;
-
+
MVT ArgVT;
if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
-
+
ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
ArgVal->hasOneUse(), 1);
} else {
@@ -1616,16 +1598,12 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (ArgReg == 0) return false;
- // FIXME: Only handle *easy* calls for now.
- if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
- CS.paramHasAttr(AttrInd, Attribute::Nest) ||
- CS.paramHasAttr(AttrInd, Attribute::ByVal))
- return false;
-
const Type *ArgTy = ArgVal->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT))
return false;
+ if (ArgVT == MVT::x86mmx)
+ return false;
unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
Flags.setOrigAlign(OriginalAlignment);
@@ -1637,7 +1615,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg, TM, ArgLocs, I->getParent()->getContext());
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs,
+ I->getParent()->getContext());
// Allocate shadow area for Win64
if (Subtarget->isTargetWin64())
@@ -1666,6 +1645,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
@@ -1673,6 +1654,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
break;
}
case CCValAssign::ZExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
@@ -1680,9 +1663,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
break;
}
case CCValAssign::AExt: {
- // We don't handle MMX parameters yet.
- if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() == 128)
- return false;
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
if (!Emitted)
@@ -1716,14 +1698,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
AM.Base.Reg = StackPtr;
AM.Disp = LocMemOffset;
const Value *ArgVal = ArgVals[VA.getValNo()];
-
- // If this is a really simple value, emit this with the Value* version of
- // X86FastEmitStore. If it isn't simple, we don't want to do this, as it
- // can cause us to reevaluate the argument.
- if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal))
+ ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()];
+
+ if (Flags.isByVal()) {
+ X86AddressMode SrcAM;
+ SrcAM.Base.Reg = Arg;
+ bool Res = TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize());
+ assert(Res && "memcpy length already checked!"); (void)Res;
+ } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
+ // If this is a really simple value, emit this with the Value* version
+ //of X86FastEmitStore. If it isn't simple, we don't want to do this,
+ // as it can cause us to reevaluate the argument.
X86FastEmitStore(ArgVT, ArgVal, AM);
- else
+ } else {
X86FastEmitStore(ArgVT, Arg, AM);
+ }
}
}
@@ -1793,8 +1782,11 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
}
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
- .addGlobalAddress(GV, 0, OpFlags);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc));
+ if (MemIntName)
+ MIB.addExternalSymbol(MemIntName, OpFlags);
+ else
+ MIB.addGlobalAddress(GV, 0, OpFlags);
}
// Add an implicit use GOT pointer in EBX.
@@ -1816,63 +1808,74 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
.addImm(NumBytes).addImm(NumBytesCallee);
- // Now handle call return value (if any).
- SmallVector<unsigned, 4> UsedRegs;
- if (RetVT != MVT::isVoid) {
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
- CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
+ // Build info for return calling conv lowering code.
+ // FIXME: This is practically a copy-paste from TargetLowering::LowerCallTo.
+ SmallVector<ISD::InputArg, 32> Ins;
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(TLI, I->getType(), RetTys);
+ for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
+ EVT VT = RetTys[i];
+ EVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
+ for (unsigned j = 0; j != NumRegs; ++j) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT.getSimpleVT();
+ MyFlags.Used = !CS.getInstruction()->use_empty();
+ if (CS.paramHasAttr(0, Attribute::SExt))
+ MyFlags.Flags.setSExt();
+ if (CS.paramHasAttr(0, Attribute::ZExt))
+ MyFlags.Flags.setZExt();
+ if (CS.paramHasAttr(0, Attribute::InReg))
+ MyFlags.Flags.setInReg();
+ Ins.push_back(MyFlags);
+ }
+ }
- // Copy all of the result registers out of their specified physreg.
- assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
- EVT CopyVT = RVLocs[0].getValVT();
- TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+ // Now handle call return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs,
+ I->getParent()->getContext());
+ unsigned ResultReg = FuncInfo.CreateRegs(I->getType());
+ CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ EVT CopyVT = RVLocs[i].getValVT();
+ unsigned CopyReg = ResultReg + i;
// If this is a call to a function that returns an fp value on the x87 fp
// stack, but where we prefer to use the value in xmm registers, copy it
// out as F80 and use a truncate to move it from fp stack reg to xmm reg.
- if ((RVLocs[0].getLocReg() == X86::ST0 ||
- RVLocs[0].getLocReg() == X86::ST1) &&
+ if ((RVLocs[i].getLocReg() == X86::ST0 ||
+ RVLocs[i].getLocReg() == X86::ST1) &&
isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
CopyVT = MVT::f80;
- DstRC = X86::RFP80RegisterClass;
+ CopyReg = createResultReg(X86::RFP80RegisterClass);
}
- unsigned ResultReg = createResultReg(DstRC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(RVLocs[0].getLocReg());
- UsedRegs.push_back(RVLocs[0].getLocReg());
+ CopyReg).addReg(RVLocs[i].getLocReg());
+ UsedRegs.push_back(RVLocs[i].getLocReg());
- if (CopyVT != RVLocs[0].getValVT()) {
+ if (CopyVT != RVLocs[i].getValVT()) {
// Round the F80 the right size, which also moves to the appropriate xmm
// register. This is accomplished by storing the F80 value in memory and
// then loading it back. Ewww...
- EVT ResVT = RVLocs[0].getValVT();
+ EVT ResVT = RVLocs[i].getValVT();
unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
unsigned MemSize = ResVT.getSizeInBits()/8;
int FI = MFI.CreateStackObject(MemSize, MemSize, false);
addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc)), FI)
- .addReg(ResultReg);
- DstRC = ResVT == MVT::f32
- ? X86::FR32RegisterClass : X86::FR64RegisterClass;
+ .addReg(CopyReg);
Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
- ResultReg = createResultReg(DstRC);
addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(Opc), ResultReg), FI);
+ TII.get(Opc), ResultReg + i), FI);
}
-
- if (AndToI1) {
- // Mask out all but lowest bit for some call which produces an i1.
- unsigned AndResult = createResultReg(X86::GR8RegisterClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
- ResultReg = AndResult;
- }
-
- UpdateValueMap(I, ResultReg);
}
+ if (RVLocs.size())
+ UpdateValueMap(I, ResultReg, RVLocs.size());
+
// Set all unused physreg defs as dead.
static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
@@ -1911,8 +1914,6 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) {
return X86SelectFPExt(I);
case Instruction::FPTrunc:
return X86SelectFPTrunc(I);
- case Instruction::ExtractValue:
- return X86SelectExtractValue(I);
case Instruction::IntToPtr: // Deliberate fall-through.
case Instruction::PtrToInt: {
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
@@ -1990,7 +1991,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
if (AM.BaseType == X86AddressMode::RegBase &&
AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0)
return AM.Base.Reg;
-
+
Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
unsigned ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index 06d12fc..2e95300 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -355,7 +355,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineModuleInfo &MMI = MF.getMMI();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
bool needsFrameMoves = MMI.hasDebugInfo() ||
- !Fn->doesNotThrow() || UnwindTablesMandatory;
+ Fn->needsUnwindTableEntry();
uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
bool HasFP = hasFP(MF);
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 4534e85..1fcc274 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -189,6 +189,7 @@ namespace {
SDNode *Select(SDNode *N);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
+ SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
@@ -1329,6 +1330,8 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
return ResNode;
}
+// FIXME: Figure out some way to unify this with the 'or' and other code
+// below.
SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
if (Node->hasAnyUseOfValue(0))
return 0;
@@ -1479,6 +1482,158 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
}
}
+enum AtomicOpc {
+ OR,
+ AND,
+ XOR,
+ AtomicOpcEnd
+};
+
+enum AtomicSz {
+ ConstantI8,
+ I8,
+ SextConstantI16,
+ ConstantI16,
+ I16,
+ SextConstantI32,
+ ConstantI32,
+ I32,
+ SextConstantI64,
+ ConstantI64,
+ I64,
+ AtomicSzEnd
+};
+
+static const unsigned int AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
+ {
+ X86::LOCK_OR8mi,
+ X86::LOCK_OR8mr,
+ X86::LOCK_OR16mi8,
+ X86::LOCK_OR16mi,
+ X86::LOCK_OR16mr,
+ X86::LOCK_OR32mi8,
+ X86::LOCK_OR32mi,
+ X86::LOCK_OR32mr,
+ X86::LOCK_OR64mi8,
+ X86::LOCK_OR64mi32,
+ X86::LOCK_OR64mr
+ },
+ {
+ X86::LOCK_AND8mi,
+ X86::LOCK_AND8mr,
+ X86::LOCK_AND16mi8,
+ X86::LOCK_AND16mi,
+ X86::LOCK_AND16mr,
+ X86::LOCK_AND32mi8,
+ X86::LOCK_AND32mi,
+ X86::LOCK_AND32mr,
+ X86::LOCK_AND64mi8,
+ X86::LOCK_AND64mi32,
+ X86::LOCK_AND64mr
+ },
+ {
+ X86::LOCK_XOR8mi,
+ X86::LOCK_XOR8mr,
+ X86::LOCK_XOR16mi8,
+ X86::LOCK_XOR16mi,
+ X86::LOCK_XOR16mr,
+ X86::LOCK_XOR32mi8,
+ X86::LOCK_XOR32mi,
+ X86::LOCK_XOR32mr,
+ X86::LOCK_XOR64mi8,
+ X86::LOCK_XOR64mi32,
+ X86::LOCK_XOR64mr
+ }
+};
+
+SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
+ if (Node->hasAnyUseOfValue(0))
+ return 0;
+
+ // Optimize common patterns for __sync_or_and_fetch and similar arith
+ // operations where the result is not used. This allows us to use the "lock"
+ // version of the arithmetic instruction.
+ // FIXME: Same as for 'add' and 'sub', try to merge those down here.
+ SDValue Chain = Node->getOperand(0);
+ SDValue Ptr = Node->getOperand(1);
+ SDValue Val = Node->getOperand(2);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ return 0;
+
+ // Which index into the table.
+ enum AtomicOpc Op;
+ switch (Node->getOpcode()) {
+ case ISD::ATOMIC_LOAD_OR:
+ Op = OR;
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ Op = AND;
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ Op = XOR;
+ break;
+ default:
+ return 0;
+ }
+
+ bool isCN = false;
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
+ if (CN) {
+ isCN = true;
+ Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT);
+ }
+
+ unsigned Opc = 0;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: return 0;
+ case MVT::i8:
+ if (isCN)
+ Opc = AtomicOpcTbl[Op][ConstantI8];
+ else
+ Opc = AtomicOpcTbl[Op][I8];
+ break;
+ case MVT::i16:
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][SextConstantI16];
+ else
+ Opc = AtomicOpcTbl[Op][ConstantI16];
+ } else
+ Opc = AtomicOpcTbl[Op][I16];
+ break;
+ case MVT::i32:
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][SextConstantI32];
+ else
+ Opc = AtomicOpcTbl[Op][ConstantI32];
+ } else
+ Opc = AtomicOpcTbl[Op][I32];
+ break;
+ case MVT::i64:
+ if (isCN) {
+ if (immSext8(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][SextConstantI64];
+ else if (i64immSExt32(Val.getNode()))
+ Opc = AtomicOpcTbl[Op][ConstantI64];
+ } else
+ Opc = AtomicOpcTbl[Op][I64];
+ break;
+ }
+
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, NVT), 0);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+ SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
+ cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+ SDValue RetVals[] = { Undef, Ret };
+ return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+}
+
/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
/// any uses which require the SF or OF bits to be accurate.
static bool HasNoSignedComparisonUses(SDNode *N) {
@@ -1580,6 +1735,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return RetVal;
break;
}
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR: {
+ SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
@@ -1843,17 +2006,17 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
Move =
- SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16,
+ SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
MVT::Other, Ops,
array_lengthof(Ops)), 0);
Chain = Move.getValue(1);
ReplaceUses(N0.getValue(1), Chain);
} else {
Move =
- SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
+ SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
Chain = CurDAG->getEntryNode();
}
- Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
InFlag = Chain.getValue(1);
} else {
InFlag =
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 703c01d..294a6a7 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -222,7 +222,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setBooleanContents(ZeroOrOneBooleanContent);
-
+
// For 64-bit since we have so many registers use the ILP scheduler, for
// 32-bit code use the register pressure specific scheduling.
if (Subtarget->is64Bit())
@@ -574,6 +574,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ // Lower this to FGETSIGNx86 plus an AND.
+ setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
+ setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
+
// We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
@@ -927,7 +931,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Can turn SHL into an integer multiply.
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
setOperationAction(ISD::SHL, MVT::v16i8, Custom);
- setOperationAction(ISD::SRL, MVT::v4i32, Legal);
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
@@ -949,6 +952,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
}
+ if (Subtarget->hasSSE2()) {
+ setOperationAction(ISD::SRL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SRL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SHL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v8i16, Custom);
+
+ setOperationAction(ISD::SRA, MVT::v4i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v8i16, Custom);
+ }
+
if (Subtarget->hasSSE42())
setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
@@ -1081,6 +1097,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::SINT_TO_FP);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
@@ -1096,6 +1113,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
setPrefLoopAlignment(16);
benefitFromCodePlacementOpt = true;
+
+ setPrefFunctionAlignment(4);
}
@@ -1247,11 +1266,6 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
}
-/// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
- return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
-}
-
// FIXME: Why this routine is here? Move to RegInfo!
std::pair<const TargetRegisterClass*, uint8_t>
X86TargetLowering::findRepresentativeClass(EVT VT) const{
@@ -1306,11 +1320,12 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
#include "X86GenCallingConv.inc"
bool
-X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_X86);
}
@@ -1325,7 +1340,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
@@ -1476,8 +1491,8 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
bool Is64Bit = Subtarget->is64Bit();
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
- RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
@@ -1518,20 +1533,6 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
// This truncation won't change the value.
DAG.getIntPtrConstant(1));
- } else if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
- // For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64.
- if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
- Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
- MVT::v2i64, InFlag).getValue(1);
- Val = Chain.getValue(0);
- Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
- Val, DAG.getConstant(0, MVT::i64));
- } else {
- Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
- MVT::i64, InFlag).getValue(1);
- Val = Chain.getValue(0);
- }
- Val = DAG.getNode(ISD::BITCAST, dl, CopyVT, Val);
} else {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
CopyVT, InFlag).getValue(1);
@@ -1680,7 +1681,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
@@ -2007,7 +2008,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
@@ -2530,16 +2531,30 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (RegInfo->needsStackRealignment(MF))
return false;
- // Do not sibcall optimize vararg calls unless the call site is not passing
- // any arguments.
- if (isVarArg && !Outs.empty())
- return false;
-
// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
if (isCalleeStructRet || isCallerStructRet)
return false;
+ // Do not sibcall optimize vararg calls unless all arguments are passed via
+ // registers.
+ if (isVarArg && !Outs.empty()) {
+
+ // Optimizing for varargs on Win64 is unlikely to be safe without
+ // additional testing.
+ if (Subtarget->isTargetWin64())
+ return false;
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
+ if (!ArgLocs[i].isRegLoc())
+ return false;
+ }
+
// If the call result is in ST0 / ST1, it needs to be popped off the x87 stack.
// Therefore if it's not used by the call it is not safe to optimize this into
// a sibcall.
@@ -2552,8 +2567,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
if (Unused) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CalleeCC, false, getTargetMachine(),
- RVLocs, *DAG.getContext());
+ CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
@@ -2566,13 +2581,13 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// results are returned in the same way as what the caller expects.
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
- CCState CCInfo1(CalleeCC, false, getTargetMachine(),
- RVLocs1, *DAG.getContext());
+ CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs1, *DAG.getContext());
CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
SmallVector<CCValAssign, 16> RVLocs2;
- CCState CCInfo2(CallerCC, false, getTargetMachine(),
- RVLocs2, *DAG.getContext());
+ CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs2, *DAG.getContext());
CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
if (RVLocs1.size() != RVLocs2.size())
@@ -2598,8 +2613,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
- ArgLocs, *DAG.getContext());
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (Subtarget->isTargetWin64()) {
@@ -6619,9 +6634,9 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
}
-/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
+/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values and
/// take a 2 x i32 value to shift plus a shift amount.
-SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
@@ -6710,12 +6725,18 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
unsigned ByteSize = SrcVT.getSizeInBits()/8;
- int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
- MachineMemOperand *MMO =
- DAG.getMachineFunction()
- .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
- MachineMemOperand::MOLoad, ByteSize, ByteSize);
-
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(StackSlot);
+ MachineMemOperand *MMO;
+ if (FI) {
+ int SSFI = FI->getIndex();
+ MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, ByteSize, ByteSize);
+ } else {
+ MMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
+ StackSlot = StackSlot.getOperand(1);
+ }
SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
X86ISD::FILD, DL,
@@ -7206,6 +7227,17 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
}
+SDValue X86TargetLowering::LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const {
+ SDValue N0 = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ // Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1).
+ SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
+ DAG.getConstant(1, VT));
+ return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT));
+}
+
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent.
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
@@ -8781,16 +8813,71 @@ SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const {
return Res;
}
-SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
LLVMContext *Context = DAG.getContext();
- assert(Subtarget->hasSSE41() && "Cannot lower SHL without SSE4.1 or later");
+ // Must have SSE2.
+ if (!Subtarget->hasSSE2()) return SDValue();
+
+ // Optimize shl/srl/sra with constant shift amount.
+ if (isSplatVector(Amt.getNode())) {
+ SDValue SclrAmt = Amt->getOperand(0);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
+ uint64_t ShiftAmt = C->getZExtValue();
+
+ if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SHL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
+ R, DAG.getConstant(ShiftAmt, MVT::i32));
+ }
+ }
+
+ // Lower SHL with variable shift amount.
+ // Cannot lower SHL without SSE4.1 or later.
+ if (!Subtarget->hasSSE41()) return SDValue();
- if (VT == MVT::v4i32) {
+ if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
Op.getOperand(1), DAG.getConstant(23, MVT::i32));
@@ -8809,7 +8896,7 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
}
- if (VT == MVT::v16i8) {
+ if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
// a = a << 5;
Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
@@ -9114,7 +9201,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::SHL_PARTS:
case ISD::SRA_PARTS:
- case ISD::SRL_PARTS: return LowerShift(Op, DAG);
+ case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
@@ -9122,6 +9209,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FABS: return LowerFABS(Op, DAG);
case ISD::FNEG: return LowerFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VSETCC: return LowerVSETCC(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
@@ -9142,7 +9230,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
- case ISD::SHL: return LowerSHL(Op, DAG);
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL: return LowerShift(Op, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
@@ -9309,6 +9399,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::UCOMI: return "X86ISD::UCOMI";
case X86ISD::SETCC: return "X86ISD::SETCC";
case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
+ case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd";
+ case X86ISD::FSETCCss: return "X86ISD::FSETCCss";
case X86ISD::CMOV: return "X86ISD::CMOV";
case X86ISD::BRCOND: return "X86ISD::BRCOND";
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
@@ -10986,14 +11078,14 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
UE = Uses.end(); UI != UE; ++UI) {
SDNode *Extract = *UI;
- // Compute the element's address.
+ // cOMpute the element's address.
SDValue Idx = Extract->getOperand(1);
unsigned EltSize =
InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
- SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(),
+ SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
StackPtr, OffsetVal);
// Load the scalar.
@@ -11266,15 +11358,28 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
return SDValue();
+ SDValue FalseOp = N->getOperand(0);
+ SDValue TrueOp = N->getOperand(1);
+ X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
+ SDValue Cond = N->getOperand(3);
+ if (CC == X86::COND_E || CC == X86::COND_NE) {
+ switch (Cond.getOpcode()) {
+ default: break;
+ case X86ISD::BSR:
+ case X86ISD::BSF:
+ // If operand of BSR / BSF are proven never zero, then ZF cannot be set.
+ if (DAG.isKnownNeverZero(Cond.getOperand(0)))
+ return (CC == X86::COND_E) ? FalseOp : TrueOp;
+ }
+ }
+
// If this is a select between two integer constants, try to do some
// optimizations. Note that the operands are ordered the opposite of SELECT
// operands.
- if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
- if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) {
+ if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) {
// Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
// larger than FalseC (the false value).
- X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
-
if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
CC = X86::GetOppositeBranchCondition(CC);
std::swap(TrueC, FalseC);
@@ -11284,7 +11389,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
// This is efficient for any integer data type (including i8/i16) and
// shift amount.
if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
- SDValue Cond = N->getOperand(3);
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
@@ -11302,7 +11406,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
// Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
// for any integer data type, including i8/i16.
if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
- SDValue Cond = N->getOperand(3);
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
@@ -11341,7 +11444,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
if (isFastMultiplier) {
APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
- SDValue Cond = N->getOperand(3);
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
// Zero extend the condition if needed.
@@ -11576,12 +11678,94 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
}
+// CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..))
+// where both setccs reference the same FP CMP, and rewrite for CMPEQSS
+// and friends. Likewise for OR -> CMPNEQSS.
+static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ unsigned opcode;
+
+ // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
+ // we're requiring SSE2 for both.
+ if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CMP0 = N0->getOperand(1);
+ SDValue CMP1 = N1->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // The SETCCs should both refer to the same CMP.
+ if (CMP0.getOpcode() != X86ISD::CMP || CMP0 != CMP1)
+ return SDValue();
+
+ SDValue CMP00 = CMP0->getOperand(0);
+ SDValue CMP01 = CMP0->getOperand(1);
+ EVT VT = CMP00.getValueType();
+
+ if (VT == MVT::f32 || VT == MVT::f64) {
+ bool ExpectingFlags = false;
+ // Check for any users that want flags:
+ for (SDNode::use_iterator UI = N->use_begin(),
+ UE = N->use_end();
+ !ExpectingFlags && UI != UE; ++UI)
+ switch (UI->getOpcode()) {
+ default:
+ case ISD::BR_CC:
+ case ISD::BRCOND:
+ case ISD::SELECT:
+ ExpectingFlags = true;
+ break;
+ case ISD::CopyToReg:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ break;
+ }
+
+ if (!ExpectingFlags) {
+ enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
+ enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
+
+ if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
+ X86::CondCode tmp = cc0;
+ cc0 = cc1;
+ cc1 = tmp;
+ }
+
+ if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
+ (cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
+ bool is64BitFP = (CMP00.getValueType() == MVT::f64);
+ X86ISD::NodeType NTOperator = is64BitFP ?
+ X86ISD::FSETCCsd : X86ISD::FSETCCss;
+ // FIXME: need symbolic constants for these magic numbers.
+ // See X86ATTInstPrinter.cpp:printSSECC().
+ unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
+ SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01,
+ DAG.getConstant(x86cc, MVT::i8));
+ SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
+ OnesOrZeroesF);
+ SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
+ DAG.getConstant(1, MVT::i32));
+ SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
+ return OneBitOfTruth;
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
+
// Want to form PANDN nodes, in the hopes of then easily combining them with
// OR and AND nodes to form PBLEND/PSIGN.
EVT VT = N->getValueType(0);
@@ -11611,6 +11795,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
+ if (R.getNode())
+ return R;
+
EVT VT = N->getValueType(0);
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
return SDValue();
@@ -11978,6 +12166,26 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86TargetLowering *XTLI) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Op0 = N->getOperand(0);
+ // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
+ // a 32-bit target where SSE doesn't support i64->FP operations.
+ if (Op0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
+ EVT VT = Ld->getValueType(0);
+ if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
+ ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
+ !XTLI->getSubtarget()->is64Bit() &&
+ !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG);
+ DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
+ return FILDChain;
+ }
+ }
+ return SDValue();
+}
+
// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
X86TargetLowering::DAGCombinerInfo &DCI) {
@@ -12062,6 +12270,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
+ case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
@@ -12491,12 +12700,16 @@ LowerXConstraint(EVT ConstraintVT) const {
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- char Constraint,
+ std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result(0, 0);
- switch (Constraint) {
+ // Only support length 1 constraints for now.
+ if (Constraint.length() > 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
default: break;
case 'I':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
@@ -12688,7 +12901,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return std::make_pair(0U, X86::GR8RegisterClass);
if (VT == MVT::i16)
return std::make_pair(0U, X86::GR16RegisterClass);
- if (VT == MVT::i32 || !Subtarget->is64Bit())
+ if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget->is64Bit())
return std::make_pair(0U, X86::GR32RegisterClass);
return std::make_pair(0U, X86::GR64RegisterClass);
case 'R': // LEGACY_REGS
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index 6301057..d61a125 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -94,6 +94,15 @@ namespace llvm {
// one's or all zero's.
SETCC_CARRY, // R = carry_bit ? ~0 : 0
+ /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
+ /// Operands are two FP values to compare; result is a mask of
+ /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
+ FSETCCss, FSETCCsd,
+
+ /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values,
+ /// result in an integer GPR. Needs masking for scalar result.
+ FGETSIGNx86,
+
/// X86 conditional moves. Operand 0 and operand 1 are the two values
/// to select from. Operand 2 is the condition code, and operand 3 is the
/// flag operand produced by a CMP or TEST instruction. It also writes a
@@ -592,7 +601,7 @@ namespace llvm {
/// true it means one of the asm constraint of the inline asm instruction
/// being processed is 'm'.
virtual void LowerAsmOperandForConstraint(SDValue Op,
- char ConstraintLetter,
+ std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
@@ -674,15 +683,15 @@ namespace llvm {
/// or null if the target does not support "fast" ISel.
virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
- /// getFunctionAlignment - Return the Log2 alignment of this function.
- virtual unsigned getFunctionAlignment(const Function *F) const;
-
/// getStackCookieLocation - Return true if the target stores stack
/// protector cookies at a fixed offset in some non-standard address
/// space, and populates the address space and offset as
/// appropriate.
virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const;
+ SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
+ SelectionDAG &DAG) const;
+
protected:
std::pair<const TargetRegisterClass*, uint8_t>
findRepresentativeClass(EVT VT) const;
@@ -773,9 +782,7 @@ namespace llvm {
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
- SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
- SelectionDAG &DAG) const;
+ SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
@@ -786,6 +793,7 @@ namespace llvm {
SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToBT(SDValue And, ISD::CondCode CC,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
@@ -808,7 +816,7 @@ namespace llvm {
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSHL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
@@ -850,9 +858,10 @@ namespace llvm {
ISD::NodeType ExtendKind) const;
virtual bool
- CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const;
+ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned NewOp) const;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
index 4c915d9..33534cd 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -214,6 +214,30 @@ def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
(SETBr)>;
+// (add OP, SETB) -> (adc OP, 0)
+def : Pat<(add (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR8:$op),
+ (ADC8ri GR8:$op, 0)>;
+def : Pat<(add (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR32:$op),
+ (ADC32ri8 GR32:$op, 0)>;
+def : Pat<(add (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1), GR64:$op),
+ (ADC64ri8 GR64:$op, 0)>;
+
+// (sub OP, SETB) -> (sbb OP, 0)
+def : Pat<(sub GR8:$op, (and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+ (SBB8ri GR8:$op, 0)>;
+def : Pat<(sub GR32:$op, (and (i32 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+ (SBB32ri8 GR32:$op, 0)>;
+def : Pat<(sub GR64:$op, (and (i64 (X86setcc_c X86_COND_B, EFLAGS)), 1)),
+ (SBB64ri8 GR64:$op, 0)>;
+
+// (sub OP, SETCC_CARRY) -> (adc OP, 0)
+def : Pat<(sub GR8:$op, (i8 (X86setcc_c X86_COND_B, EFLAGS))),
+ (ADC8ri GR8:$op, 0)>;
+def : Pat<(sub GR32:$op, (i32 (X86setcc_c X86_COND_B, EFLAGS))),
+ (ADC32ri8 GR32:$op, 0)>;
+def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
+ (ADC64ri8 GR64:$op, 0)>;
+
//===----------------------------------------------------------------------===//
// String Pseudo Instructions
//
@@ -519,85 +543,98 @@ def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
Requires<[In64BitMode]>, LOCK;
-// Optimized codegen when the non-memory output is not used.
+// RegOpc corresponds to the mr version of the instruction
+// ImmOpc corresponds to the mi version of the instruction
+// ImmOpc8 corresponds to the mi8 version of the instruction
+// ImmMod corresponds to the instruction format of the mi and mi8 versions
+multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
+ Format ImmMod, string mnemonic> {
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
-def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- "lock\n\t"
- "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "lock\n\t"
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "lock\n\t"
- "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "lock\n\t"
- "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
- "lock\n\t"
- "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "lock\n\t"
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "lock\n\t"
- "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs),
- (ins i64mem:$dst, i64i32imm :$src2),
- "lock\n\t"
- "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-
-def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "lock\n\t"
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "lock\n\t"
- "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
- (ins i64mem:$dst, i64i8imm :$src2),
- "lock\n\t"
- "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-
-def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
- "lock\n\t"
- "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "lock\n\t"
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "lock\n\t"
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "lock\n\t"
- "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
+ MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ !strconcat("lock\n\t", mnemonic, "{b}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ !strconcat("lock\n\t", mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, OpSize, LOCK;
+def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ !strconcat("lock\n\t", mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
+ RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
+ MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ !strconcat("lock\n\t", mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
+ ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{b}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
+ ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
+ ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+
+def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
+ !strconcat("lock\n\t", mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ []>, LOCK;
+}
-def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
- "lock\n\t"
- "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "lock\n\t"
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "lock\n\t"
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs),
- (ins i64mem:$dst, i64i32imm:$src2),
- "lock\n\t"
- "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+}
+defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">;
+defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">;
+defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">;
+defm LOCK_AND : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM4m, "and">;
+defm LOCK_XOR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM6m, "xor">;
-def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "lock\n\t"
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "lock\n\t"
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs),
- (ins i64mem:$dst, i64i8imm :$src2),
- "lock\n\t"
- "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+// Optimized codegen when the non-memory output is not used.
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
"lock\n\t"
@@ -960,7 +997,8 @@ def : Pat<(extloadi64i32 addr:$src),
// anyext. Define these to do an explicit zero-extend to
// avoid partial-register updates.
-def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
+def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG
+ (MOVZX32rr8 GR8 :$src), sub_16bit)>;
def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
@@ -1127,9 +1165,9 @@ def : Pat<(and GR32:$src1, 0xff),
Requires<[In32BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
- (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1,
- GR16_ABCD)),
- sub_8bit))>,
+ (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG
+ (i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)),
+ sub_16bit)>,
Requires<[In32BitMode]>;
// r & (2^32-1) ==> movz
@@ -1147,7 +1185,8 @@ def : Pat<(and GR32:$src1, 0xff),
Requires<[In64BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
- (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
+ (EXTRACT_SUBREG (MOVZX32rr8 (i8
+ (EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
Requires<[In64BitMode]>;
@@ -1159,10 +1198,11 @@ def : Pat<(sext_inreg GR32:$src, i8),
GR32_ABCD)),
sub_8bit))>,
Requires<[In32BitMode]>;
+
def : Pat<(sext_inreg GR16:$src, i8),
- (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
- GR16_ABCD)),
- sub_8bit))>,
+ (EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG
+ (i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))),
+ sub_16bit)>,
Requires<[In32BitMode]>;
def : Pat<(sext_inreg GR64:$src, i32),
@@ -1175,9 +1215,19 @@ def : Pat<(sext_inreg GR32:$src, i8),
(MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
Requires<[In64BitMode]>;
def : Pat<(sext_inreg GR16:$src, i8),
- (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
+ (EXTRACT_SUBREG (MOVSX32rr8
+ (EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>,
Requires<[In64BitMode]>;
+// sext, sext_load, zext, zext_load
+def: Pat<(i16 (sext GR8:$src)),
+ (EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;
+def: Pat<(sextloadi16i8 addr:$src),
+ (EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;
+def: Pat<(i16 (zext GR8:$src)),
+ (EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;
+def: Pat<(zextloadi16i8 addr:$src),
+ (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
// trunc patterns
def : Pat<(i16 (trunc GR32:$src)),
@@ -1474,12 +1524,6 @@ def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
(IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
-// Optimize multiply by 2 with EFLAGS result.
-let AddedComplexity = 2 in {
-def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
-}
-
// Patterns for nodes that do not produce flags, for instructions that do.
// addition
diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
index 867c0f8..2e1d523 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrExtension.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
@@ -38,22 +38,11 @@ let neverHasSideEffects = 1 in {
// Sign/Zero extenders
-// Use movsbl intead of movsbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update. Actual movsbw included for the disassembler.
-def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
- "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
- "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-
-// FIXME: Use a pat pattern or define a syntax here.
-let isCodeGenOnly=1 in {
-def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
- "", [(set GR16:$dst, (sext GR8:$src))]>, TB;
-def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
- "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
-}
-def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sext GR8:$src))]>, TB;
def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
@@ -66,20 +55,10 @@ def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
-// Use movzbl intead of movzbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update. Actual movzbw included for the disassembler.
-def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
- "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
- "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-// FIXME: Use a pat pattern or define a syntax here.
-let isCodeGenOnly=1 in {
-def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
- "", [(set GR16:$dst, (zext GR8:$src))]>, TB;
-def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
- "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
-}
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zext GR8:$src))]>, TB;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 3cbfac1..7c9a9f7 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -38,8 +38,11 @@ def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
+def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
+def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>;
+def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index 83f0260..e2016eb 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -2015,62 +2015,48 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
bool isStackAligned,
const TargetMachine &TM,
bool load) {
- switch (RC->getID()) {
+ switch (RC->getSize()) {
default:
- llvm_unreachable("Unknown regclass");
- case X86::GR64RegClassID:
- case X86::GR64_ABCDRegClassID:
- case X86::GR64_NOREXRegClassID:
- case X86::GR64_NOREX_NOSPRegClassID:
- case X86::GR64_NOSPRegClassID:
- case X86::GR64_TCRegClassID:
- case X86::GR64_TCW64RegClassID:
- return load ? X86::MOV64rm : X86::MOV64mr;
- case X86::GR32RegClassID:
- case X86::GR32_ABCDRegClassID:
- case X86::GR32_ADRegClassID:
- case X86::GR32_NOREXRegClassID:
- case X86::GR32_NOSPRegClassID:
- case X86::GR32_TCRegClassID:
- return load ? X86::MOV32rm : X86::MOV32mr;
- case X86::GR16RegClassID:
- case X86::GR16_ABCDRegClassID:
- case X86::GR16_NOREXRegClassID:
- return load ? X86::MOV16rm : X86::MOV16mr;
- case X86::GR8RegClassID:
- // Copying to or from a physical H register on x86-64 requires a NOREX
- // move. Otherwise use a normal move.
- if (isHReg(Reg) &&
- TM.getSubtarget<X86Subtarget>().is64Bit())
- return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
- else
- return load ? X86::MOV8rm : X86::MOV8mr;
- case X86::GR8_ABCD_LRegClassID:
- case X86::GR8_NOREXRegClassID:
- return load ? X86::MOV8rm :X86::MOV8mr;
- case X86::GR8_ABCD_HRegClassID:
+ llvm_unreachable("Unknown spill size");
+ case 1:
+ assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");
if (TM.getSubtarget<X86Subtarget>().is64Bit())
- return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
- else
- return load ? X86::MOV8rm : X86::MOV8mr;
- case X86::RFP80RegClassID:
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
+ return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
+ return load ? X86::MOV8rm : X86::MOV8mr;
+ case 2:
+ assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
+ return load ? X86::MOV16rm : X86::MOV16mr;
+ case 4:
+ if (X86::GR32RegClass.hasSubClassEq(RC))
+ return load ? X86::MOV32rm : X86::MOV32mr;
+ if (X86::FR32RegClass.hasSubClassEq(RC))
+ return load ? X86::MOVSSrm : X86::MOVSSmr;
+ if (X86::RFP32RegClass.hasSubClassEq(RC))
+ return load ? X86::LD_Fp32m : X86::ST_Fp32m;
+ llvm_unreachable("Unknown 4-byte regclass");
+ case 8:
+ if (X86::GR64RegClass.hasSubClassEq(RC))
+ return load ? X86::MOV64rm : X86::MOV64mr;
+ if (X86::FR64RegClass.hasSubClassEq(RC))
+ return load ? X86::MOVSDrm : X86::MOVSDmr;
+ if (X86::VR64RegClass.hasSubClassEq(RC))
+ return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
+ if (X86::RFP64RegClass.hasSubClassEq(RC))
+ return load ? X86::LD_Fp64m : X86::ST_Fp64m;
+ llvm_unreachable("Unknown 8-byte regclass");
+ case 10:
+ assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
return load ? X86::LD_Fp80m : X86::ST_FpP80m;
- case X86::RFP64RegClassID:
- return load ? X86::LD_Fp64m : X86::ST_Fp64m;
- case X86::RFP32RegClassID:
- return load ? X86::LD_Fp32m : X86::ST_Fp32m;
- case X86::FR32RegClassID:
- return load ? X86::MOVSSrm : X86::MOVSSmr;
- case X86::FR64RegClassID:
- return load ? X86::MOVSDrm : X86::MOVSDmr;
- case X86::VR128RegClassID:
+ case 16:
+ assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass");
// If stack is realigned we can use aligned stores.
if (isStackAligned)
return load ? X86::MOVAPSrm : X86::MOVAPSmr;
else
return load ? X86::MOVUPSrm : X86::MOVUPSmr;
- case X86::VR64RegClassID:
- return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
}
}
@@ -2434,7 +2420,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = 4;
break;
default:
- llvm_unreachable("Don't know how to fold this instruction!");
+ return 0;
}
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
index 8da68b5..d895023 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -449,7 +449,6 @@ namespace X86II {
SSEDomainShift = SegOvrShift + 2,
OpcodeShift = SSEDomainShift + 2,
- OpcodeMask = 0xFFULL << OpcodeShift,
//===------------------------------------------------------------------===//
/// VEX - The opcode prefix used by AVX instructions
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index 03a0b0c..8cab808 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -23,6 +23,9 @@ def SDTIntShiftDOp: SDTypeProfile<1, 3,
def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
+def SDTX86Cmpsd : SDTypeProfile<1, 3, [SDTCisVT<0, f64>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+
def SDTX86Cmov : SDTypeProfile<1, 4,
[SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
@@ -1485,6 +1488,7 @@ defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
// solely because gas supports it.
def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op), 0>;
def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>;
+def : InstAlias<"fsubp %st(0), $op", (SUBR_FPrST0 RST:$op)>;
def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>;
def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>;
def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>;
@@ -1546,8 +1550,8 @@ def : InstAlias<"movq $src, $dst",
def : InstAlias<"movsd", (MOVSD)>;
// movsx aliases
-def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src), 0>;
-def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>;
def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
@@ -1555,8 +1559,8 @@ def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
// movzx aliases
-def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src), 0>;
-def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>;
def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
index bb2165a..b2d9fca 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
@@ -285,7 +285,7 @@ let Constraints = "$src1 = $dst" in
defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>;
defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por, 1>;
defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>;
-defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn, 1>;
+defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn>;
// Shift Instructions
defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index cde3f6b..b64c03a 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1056,13 +1056,37 @@ let neverHasSideEffects = 1 in {
XD, VEX_4V;
}
+let Constraints = "$src1 = $dst" in {
+def CMPSSrr : SIi8<0xC2, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, SSECC:$cc),
+ "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), FR32:$src2, imm:$cc))]>, XS;
+def CMPSSrm : SIi8<0xC2, MRMSrcMem,
+ (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, SSECC:$cc),
+ "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), (loadf32 addr:$src2), imm:$cc))]>, XS;
+def CMPSDrr : SIi8<0xC2, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, SSECC:$cc),
+ "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), FR64:$src2, imm:$cc))]>, XD;
+def CMPSDrm : SIi8<0xC2, MRMSrcMem,
+ (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, SSECC:$cc),
+ "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), (loadf64 addr:$src2), imm:$cc))]>, XD;
+}
let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
- defm CMPSS : sse12_cmp_scalar<FR32, f32mem,
- "cmp${cc}ss\t{$src, $dst|$dst, $src}",
- "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}">, XS;
- defm CMPSD : sse12_cmp_scalar<FR64, f64mem,
- "cmp${cc}sd\t{$src, $dst|$dst, $src}",
- "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}">, XD;
+def CMPSSrr_alt : SIi8<0xC2, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
+ "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
+def CMPSSrm_alt : SIi8<0xC2, MRMSrcMem,
+ (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
+ "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS;
+def CMPSDrr_alt : SIi8<0xC2, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
+ "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
+def CMPSDrm_alt : SIi8<0xC2, MRMSrcMem,
+ (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
+ "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD;
}
multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
@@ -1327,11 +1351,6 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
}
// Mask creation
-defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
- SSEPackedSingle>, TB;
-defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
- SSEPackedDouble>, TB, OpSize;
-
defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
"movmskps", SSEPackedSingle>, VEX;
defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
@@ -1342,6 +1361,24 @@ defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
"movmskpd", SSEPackedDouble>, OpSize,
VEX;
+defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
+ SSEPackedSingle>, TB;
+defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
+ SSEPackedDouble>, TB, OpSize;
+
+// X86fgetsign
+def MOVMSKPDrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize;
+def MOVMSKPDrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (X86fgetsign FR64:$src))], SSEPackedDouble>, TB, OpSize;
+def MOVMSKPSrr32_alt : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
+ "movmskps\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB;
+def MOVMSKPSrr64_alt : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
+ "movmskps\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (X86fgetsign FR32:$src))], SSEPackedSingle>, TB;
// Assembler Only
def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
@@ -1875,21 +1912,6 @@ defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
// SSE 1 & 2 - Non-temporal stores
//===----------------------------------------------------------------------===//
-def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX;
-def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX;
-
-let ExeDomain = SSEPackedInt in
- def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX;
-
let AddedComplexity = 400 in { // Prefer non-temporal versions
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
@@ -1906,12 +1928,16 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2f64 VR128:$src),
addr:$dst)]>, VEX;
+
let ExeDomain = SSEPackedInt in
- def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src),
- addr:$dst)]>, VEX;
+ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+
+ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
@@ -1943,18 +1969,6 @@ def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
(VMOVNTPSYmr addr:$dst, VR256:$src)>;
-def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
-def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
-
-let ExeDomain = SSEPackedInt in
-def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
-
let AddedComplexity = 400 in { // Prefer non-temporal versions
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
@@ -1972,22 +1986,19 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (MOVNTDQmr addr:$dst, VR128:$src)>;
+
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti\t{$src, $dst|$dst, $src}",
[(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
TB, Requires<[HasSSE2]>;
-
def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"movnti\t{$src, $dst|$dst, $src}",
[(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
TB, Requires<[HasSSE2]>;
-
}
-def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "movnti\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
- TB, Requires<[HasSSE2]>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Misc Instructions (No AVX form)
@@ -4733,14 +4744,14 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr,
- "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+ "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
OpSize;
def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr,
- "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+ "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
(IntId VR128:$src1,
(bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
@@ -4961,66 +4972,66 @@ defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
// This set of instructions are only rm, the only difference is the size
// of r and m.
let Constraints = "$src1 = $dst" in {
- def CRC32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
+ def CRC32r32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i8mem:$src2),
"crc32{b} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
- (int_x86_sse42_crc32_8 GR32:$src1,
+ (int_x86_sse42_crc32_32_8 GR32:$src1,
(load addr:$src2)))]>;
- def CRC32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
+ def CRC32r32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR8:$src2),
"crc32{b} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
- (int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>;
- def CRC32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+ (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))]>;
+ def CRC32r32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i16mem:$src2),
"crc32{w} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
- (int_x86_sse42_crc32_16 GR32:$src1,
+ (int_x86_sse42_crc32_32_16 GR32:$src1,
(load addr:$src2)))]>,
OpSize;
- def CRC32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+ def CRC32r32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR16:$src2),
"crc32{w} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
- (int_x86_sse42_crc32_16 GR32:$src1, GR16:$src2))]>,
+ (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))]>,
OpSize;
- def CRC32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+ def CRC32r32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"crc32{l} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
- (int_x86_sse42_crc32_32 GR32:$src1,
+ (int_x86_sse42_crc32_32_32 GR32:$src1,
(load addr:$src2)))]>;
- def CRC32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+ def CRC32r32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"crc32{l} \t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
- (int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>;
- def CRC64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
+ (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))]>;
+ def CRC32r64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i8mem:$src2),
"crc32{b} \t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
- (int_x86_sse42_crc64_8 GR64:$src1,
+ (int_x86_sse42_crc32_64_8 GR64:$src1,
(load addr:$src2)))]>,
REX_W;
- def CRC64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
+ def CRC32r64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR8:$src2),
"crc32{b} \t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
- (int_x86_sse42_crc64_8 GR64:$src1, GR8:$src2))]>,
+ (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))]>,
REX_W;
- def CRC64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
+ def CRC32r64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"crc32{q} \t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
- (int_x86_sse42_crc64_64 GR64:$src1,
+ (int_x86_sse42_crc32_64_64 GR64:$src1,
(load addr:$src2)))]>,
REX_W;
- def CRC64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
+ def CRC32r64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"crc32{q} \t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
- (int_x86_sse42_crc64_64 GR64:$src1, GR64:$src2))]>,
+ (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))]>,
REX_W;
}
diff --git a/contrib/llvm/lib/Target/X86/X86MCAsmInfo.cpp b/contrib/llvm/lib/Target/X86/X86MCAsmInfo.cpp
index 83bba52..2e1ec63 100644
--- a/contrib/llvm/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCAsmInfo.cpp
@@ -108,8 +108,6 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
- DwarfRequiresFrameSection = false;
-
// OpenBSD has buggy support for .quad in 32-bit mode, just split into two
// .words.
if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86)
diff --git a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
index f195a67..55aceba 100644
--- a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -1015,7 +1015,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
} else {
unsigned FixupKind;
// FIXME: Is there a better way to know that we need a signed relocation?
- if (MI.getOpcode() == X86::MOV64ri32 ||
+ if (MI.getOpcode() == X86::ADD64ri32 ||
+ MI.getOpcode() == X86::MOV64ri32 ||
MI.getOpcode() == X86::MOV64mi32 ||
MI.getOpcode() == X86::PUSH64i32)
FixupKind = X86::reloc_signed_4byte;
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
index cbe6db2..793156f 100644
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -355,10 +355,6 @@ ReSimplify:
assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
"LEA has segment specified!");
break;
- case X86::MOVZX16rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
- case X86::MOVZX16rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
- case X86::MOVSX16rr8: LowerSubReg32_Op0(OutMI, X86::MOVSX32rr8); break;
- case X86::MOVSX16rm8: LowerSubReg32_Op0(OutMI, X86::MOVSX32rm8); break;
case X86::MOVZX64rr32: LowerSubReg32_Op0(OutMI, X86::MOV32rr); break;
case X86::MOVZX64rm32: LowerSubReg32_Op0(OutMI, X86::MOV32rm); break;
case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break;
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 37fb0fe..1ad6203 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -73,29 +73,61 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
}
}
-/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF
-/// specific numbering, used in debug info and exception tables.
-int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
- const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
- unsigned Flavour = DWARFFlavour::X86_64;
-
+static unsigned getFlavour(const X86Subtarget *Subtarget, bool isEH) {
if (!Subtarget->is64Bit()) {
if (Subtarget->isTargetDarwin()) {
if (isEH)
- Flavour = DWARFFlavour::X86_32_DarwinEH;
+ return DWARFFlavour::X86_32_DarwinEH;
else
- Flavour = DWARFFlavour::X86_32_Generic;
+ return DWARFFlavour::X86_32_Generic;
} else if (Subtarget->isTargetCygMing()) {
// Unsupported by now, just quick fallback
- Flavour = DWARFFlavour::X86_32_Generic;
+ return DWARFFlavour::X86_32_Generic;
} else {
- Flavour = DWARFFlavour::X86_32_Generic;
+ return DWARFFlavour::X86_32_Generic;
}
}
+ return DWARFFlavour::X86_64;
+}
+
+/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF
+/// specific numbering, used in debug info and exception tables.
+int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ unsigned Flavour = getFlavour(Subtarget, isEH);
return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour);
}
+/// getLLVMRegNum - This function maps DWARF register numbers to LLVM register.
+int X86RegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ unsigned Flavour = getFlavour(Subtarget, isEH);
+
+ return X86GenRegisterInfo::getLLVMRegNumFull(DwarfRegNo, Flavour);
+}
+
+int
+X86RegisterInfo::getSEHRegNum(unsigned i) const {
+ int reg = getX86RegNum(i);
+ switch (i) {
+ case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
+ case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
+ case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+ case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+ case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+ case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+ case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+ case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+ case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ reg += 8;
+ }
+ return reg;
+}
+
/// getX86RegNum - This function maps LLVM register identifiers to their X86
/// specific numbering, which is used in various places encoding instructions.
unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
@@ -229,19 +261,13 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
}
break;
case X86::sub_8bit_hi:
- if (B == &X86::GR8_ABCD_HRegClass) {
- if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
- A == &X86::GR64_NOREXRegClass ||
- A == &X86::GR64_NOSPRegClass ||
- A == &X86::GR64_NOREX_NOSPRegClass)
- return &X86::GR64_ABCDRegClass;
- else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
- A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
- return &X86::GR32_ABCDRegClass;
- else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
- A == &X86::GR16_NOREXRegClass)
- return &X86::GR16_ABCDRegClass;
- }
+ if (B->hasSubClassEq(&X86::GR8_ABCD_HRegClass))
+ switch (A->getSize()) {
+ case 2: return getCommonSubClass(A, &X86::GR16_ABCDRegClass);
+ case 4: return getCommonSubClass(A, &X86::GR32_ABCDRegClass);
+ case 8: return getCommonSubClass(A, &X86::GR64_ABCDRegClass);
+ default: return 0;
+ }
break;
case X86::sub_16bit:
if (B == &X86::GR16RegClass) {
@@ -285,9 +311,16 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
A == &X86::GR64_NOREX_NOSPRegClass)
return &X86::GR64_ABCDRegClass;
} else if (B == &X86::GR32_NOREXRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass)
+ return &X86::GR64_NOREXRegClass;
+ else if (A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+ return &X86::GR64_NOREX_NOSPRegClass;
+ else if (A == &X86::GR64_ABCDRegClass)
+ return &X86::GR64_ABCDRegClass;
+ } else if (B == &X86::GR32_NOREX_NOSPRegClass) {
if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
- return &X86::GR64_NOREXRegClass;
+ return &X86::GR64_NOREX_NOSPRegClass;
else if (A == &X86::GR64_ABCDRegClass)
return &X86::GR64_ABCDRegClass;
}
@@ -473,6 +506,34 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(X86::ST5);
Reserved.set(X86::ST6);
Reserved.set(X86::ST7);
+
+ // Mark the segment registers as reserved.
+ Reserved.set(X86::CS);
+ Reserved.set(X86::SS);
+ Reserved.set(X86::DS);
+ Reserved.set(X86::ES);
+ Reserved.set(X86::FS);
+ Reserved.set(X86::GS);
+
+ // Reserve the registers that only exist in 64-bit mode.
+ if (!Is64Bit) {
+ for (unsigned n = 0; n != 8; ++n) {
+ const unsigned GPR64[] = {
+ X86::R8, X86::R9, X86::R10, X86::R11,
+ X86::R12, X86::R13, X86::R14, X86::R15
+ };
+ for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI;
+ ++AI)
+ Reserved.set(Reg);
+
+ // XMM8, XMM9, ...
+ assert(X86::XMM15 == X86::XMM8+7);
+ for (const unsigned *AI = getOverlaps(X86::XMM8 + n); unsigned Reg = *AI;
+ ++AI)
+ Reserved.set(Reg);
+ }
+ }
+
return Reserved;
}
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
index 9970c52..dd3d3dc 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -80,6 +80,10 @@ public:
/// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum
/// (created by TableGen) for target dependencies.
int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ int getLLVMRegNum(unsigned RegNum, bool isEH) const;
+
+ // FIXME: This should be tablegen'd like getDwarfRegNum is
+ int getSEHRegNum(unsigned i) const;
/// Code Generation virtual methods...
///
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
index fd7a247..f1d149c 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -41,82 +41,82 @@ let Namespace = "X86" in {
// 8-bit registers
// Low registers
- def AL : Register<"al">, DwarfRegNum<[0, 0, 0]>;
- def DL : Register<"dl">, DwarfRegNum<[1, 2, 2]>;
- def CL : Register<"cl">, DwarfRegNum<[2, 1, 1]>;
- def BL : Register<"bl">, DwarfRegNum<[3, 3, 3]>;
+ def AL : Register<"al">;
+ def DL : Register<"dl">;
+ def CL : Register<"cl">;
+ def BL : Register<"bl">;
// X86-64 only, requires REX.
let CostPerUse = 1 in {
- def SIL : Register<"sil">, DwarfRegNum<[4, 6, 6]>;
- def DIL : Register<"dil">, DwarfRegNum<[5, 7, 7]>;
- def BPL : Register<"bpl">, DwarfRegNum<[6, 4, 5]>;
- def SPL : Register<"spl">, DwarfRegNum<[7, 5, 4]>;
- def R8B : Register<"r8b">, DwarfRegNum<[8, -2, -2]>;
- def R9B : Register<"r9b">, DwarfRegNum<[9, -2, -2]>;
- def R10B : Register<"r10b">, DwarfRegNum<[10, -2, -2]>;
- def R11B : Register<"r11b">, DwarfRegNum<[11, -2, -2]>;
- def R12B : Register<"r12b">, DwarfRegNum<[12, -2, -2]>;
- def R13B : Register<"r13b">, DwarfRegNum<[13, -2, -2]>;
- def R14B : Register<"r14b">, DwarfRegNum<[14, -2, -2]>;
- def R15B : Register<"r15b">, DwarfRegNum<[15, -2, -2]>;
+ def SIL : Register<"sil">;
+ def DIL : Register<"dil">;
+ def BPL : Register<"bpl">;
+ def SPL : Register<"spl">;
+ def R8B : Register<"r8b">;
+ def R9B : Register<"r9b">;
+ def R10B : Register<"r10b">;
+ def R11B : Register<"r11b">;
+ def R12B : Register<"r12b">;
+ def R13B : Register<"r13b">;
+ def R14B : Register<"r14b">;
+ def R15B : Register<"r15b">;
}
// High registers. On x86-64, these cannot be used in any instruction
// with a REX prefix.
- def AH : Register<"ah">, DwarfRegNum<[0, 0, 0]>;
- def DH : Register<"dh">, DwarfRegNum<[1, 2, 2]>;
- def CH : Register<"ch">, DwarfRegNum<[2, 1, 1]>;
- def BH : Register<"bh">, DwarfRegNum<[3, 3, 3]>;
+ def AH : Register<"ah">;
+ def DH : Register<"dh">;
+ def CH : Register<"ch">;
+ def BH : Register<"bh">;
// 16-bit registers
let SubRegIndices = [sub_8bit, sub_8bit_hi] in {
- def AX : RegisterWithSubRegs<"ax", [AL,AH]>, DwarfRegNum<[0, 0, 0]>;
- def DX : RegisterWithSubRegs<"dx", [DL,DH]>, DwarfRegNum<[1, 2, 2]>;
- def CX : RegisterWithSubRegs<"cx", [CL,CH]>, DwarfRegNum<[2, 1, 1]>;
- def BX : RegisterWithSubRegs<"bx", [BL,BH]>, DwarfRegNum<[3, 3, 3]>;
+ def AX : RegisterWithSubRegs<"ax", [AL,AH]>;
+ def DX : RegisterWithSubRegs<"dx", [DL,DH]>;
+ def CX : RegisterWithSubRegs<"cx", [CL,CH]>;
+ def BX : RegisterWithSubRegs<"bx", [BL,BH]>;
}
let SubRegIndices = [sub_8bit] in {
- def SI : RegisterWithSubRegs<"si", [SIL]>, DwarfRegNum<[4, 6, 6]>;
- def DI : RegisterWithSubRegs<"di", [DIL]>, DwarfRegNum<[5, 7, 7]>;
- def BP : RegisterWithSubRegs<"bp", [BPL]>, DwarfRegNum<[6, 4, 5]>;
- def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>;
+ def SI : RegisterWithSubRegs<"si", [SIL]>;
+ def DI : RegisterWithSubRegs<"di", [DIL]>;
+ def BP : RegisterWithSubRegs<"bp", [BPL]>;
+ def SP : RegisterWithSubRegs<"sp", [SPL]>;
}
- def IP : Register<"ip">, DwarfRegNum<[16]>;
+ def IP : Register<"ip">;
// X86-64 only, requires REX.
let SubRegIndices = [sub_8bit], CostPerUse = 1 in {
- def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
- def R9W : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>;
- def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>;
- def R11W : RegisterWithSubRegs<"r11w", [R11B]>, DwarfRegNum<[11, -2, -2]>;
- def R12W : RegisterWithSubRegs<"r12w", [R12B]>, DwarfRegNum<[12, -2, -2]>;
- def R13W : RegisterWithSubRegs<"r13w", [R13B]>, DwarfRegNum<[13, -2, -2]>;
- def R14W : RegisterWithSubRegs<"r14w", [R14B]>, DwarfRegNum<[14, -2, -2]>;
- def R15W : RegisterWithSubRegs<"r15w", [R15B]>, DwarfRegNum<[15, -2, -2]>;
+ def R8W : RegisterWithSubRegs<"r8w", [R8B]>;
+ def R9W : RegisterWithSubRegs<"r9w", [R9B]>;
+ def R10W : RegisterWithSubRegs<"r10w", [R10B]>;
+ def R11W : RegisterWithSubRegs<"r11w", [R11B]>;
+ def R12W : RegisterWithSubRegs<"r12w", [R12B]>;
+ def R13W : RegisterWithSubRegs<"r13w", [R13B]>;
+ def R14W : RegisterWithSubRegs<"r14w", [R14B]>;
+ def R15W : RegisterWithSubRegs<"r15w", [R15B]>;
}
// 32-bit registers
let SubRegIndices = [sub_16bit] in {
- def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[0, 0, 0]>;
- def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[1, 2, 2]>;
- def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[2, 1, 1]>;
- def EBX : RegisterWithSubRegs<"ebx", [BX]>, DwarfRegNum<[3, 3, 3]>;
- def ESI : RegisterWithSubRegs<"esi", [SI]>, DwarfRegNum<[4, 6, 6]>;
- def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[5, 7, 7]>;
- def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[6, 4, 5]>;
- def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>;
- def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;
+ def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[-2, 0, 0]>;
+ def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[-2, 2, 2]>;
+ def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[-2, 1, 1]>;
+ def EBX : RegisterWithSubRegs<"ebx", [BX]>, DwarfRegNum<[-2, 3, 3]>;
+ def ESI : RegisterWithSubRegs<"esi", [SI]>, DwarfRegNum<[-2, 6, 6]>;
+ def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[-2, 7, 7]>;
+ def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[-2, 4, 5]>;
+ def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[-2, 5, 4]>;
+ def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[-2, 8, 8]>;
// X86-64 only, requires REX
let CostPerUse = 1 in {
- def R8D : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>;
- def R9D : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>;
- def R10D : RegisterWithSubRegs<"r10d", [R10W]>, DwarfRegNum<[10, -2, -2]>;
- def R11D : RegisterWithSubRegs<"r11d", [R11W]>, DwarfRegNum<[11, -2, -2]>;
- def R12D : RegisterWithSubRegs<"r12d", [R12W]>, DwarfRegNum<[12, -2, -2]>;
- def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>;
- def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>;
- def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>;
+ def R8D : RegisterWithSubRegs<"r8d", [R8W]>;
+ def R9D : RegisterWithSubRegs<"r9d", [R9W]>;
+ def R10D : RegisterWithSubRegs<"r10d", [R10W]>;
+ def R11D : RegisterWithSubRegs<"r11d", [R11W]>;
+ def R12D : RegisterWithSubRegs<"r12d", [R12W]>;
+ def R13D : RegisterWithSubRegs<"r13d", [R13W]>;
+ def R14D : RegisterWithSubRegs<"r14d", [R14W]>;
+ def R15D : RegisterWithSubRegs<"r15d", [R15W]>;
}}
// 64-bit registers, X86-64 only
@@ -188,22 +188,22 @@ let Namespace = "X86" in {
// YMM Registers, used by AVX instructions
let SubRegIndices = [sub_xmm] in {
- def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>;
- def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>;
- def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>;
- def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegNum<[20, 24, 24]>;
- def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegNum<[21, 25, 25]>;
- def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegNum<[22, 26, 26]>;
- def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegNum<[23, 27, 27]>;
- def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegNum<[24, 28, 28]>;
- def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegNum<[25, -2, -2]>;
- def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegNum<[26, -2, -2]>;
- def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegNum<[27, -2, -2]>;
- def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegNum<[28, -2, -2]>;
- def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegNum<[29, -2, -2]>;
- def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>;
- def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>;
- def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>;
+ def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegAlias<XMM0>;
+ def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegAlias<XMM1>;
+ def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegAlias<XMM2>;
+ def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegAlias<XMM3>;
+ def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegAlias<XMM4>;
+ def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegAlias<XMM5>;
+ def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegAlias<XMM6>;
+ def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegAlias<XMM7>;
+ def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegAlias<XMM8>;
+ def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegAlias<XMM9>;
+ def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegAlias<XMM10>;
+ def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegAlias<XMM11>;
+ def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegAlias<XMM12>;
+ def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegAlias<XMM13>;
+ def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegAlias<XMM14>;
+ def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegAlias<XMM15>;
}
// Floating point stack registers
@@ -326,104 +326,12 @@ def GR16 : RegisterClass<"X86", [i16], 16,
[AX, CX, DX, SI, DI, BX, BP, SP,
R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)];
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned X86_GR16_AO_64[] = {
- X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
- X86::R8W, X86::R9W, X86::R10W, X86::R11W,
- X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W, X86::BP
- };
-
- GR16Class::iterator
- GR16Class::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (Subtarget.is64Bit())
- return X86_GR16_AO_64;
- else
- return begin();
- }
-
- GR16Class::iterator
- GR16Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- if (Subtarget.is64Bit()) {
- // Does the function dedicate RBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate SP or BP.
- return array_endof(X86_GR16_AO_64) - 1;
- else
- // If not, just don't allocate SP.
- return array_endof(X86_GR16_AO_64);
- } else {
- // Does the function dedicate EBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate SP or BP.
- return begin() + 6;
- else
- // If not, just don't allocate SP.
- return begin() + 7;
- }
- }
- }];
}
def GR32 : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned X86_GR32_AO_64[] = {
- X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
- X86::R8D, X86::R9D, X86::R10D, X86::R11D,
- X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP
- };
-
- GR32Class::iterator
- GR32Class::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (Subtarget.is64Bit())
- return X86_GR32_AO_64;
- else
- return begin();
- }
-
- GR32Class::iterator
- GR32Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- if (Subtarget.is64Bit()) {
- // Does the function dedicate RBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate ESP or EBP.
- return array_endof(X86_GR32_AO_64) - 1;
- else
- // If not, just don't allocate ESP.
- return array_endof(X86_GR32_AO_64);
- } else {
- // Does the function dedicate EBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate ESP or EBP.
- return begin() + 6;
- else
- // If not, just don't allocate ESP.
- return begin() + 7;
- }
- }
- }];
}
// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
@@ -435,25 +343,6 @@ def GR64 : RegisterClass<"X86", [i64], 64,
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
(GR16 sub_16bit),
(GR32 sub_32bit)];
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- GR64Class::iterator
- GR64Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- if (!Subtarget.is64Bit())
- return begin(); // None of these are allocatable in 32-bit.
- // Does the function dedicate RBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- return end()-3; // If so, don't allocate RIP, RSP or RBP
- else
- return end()-2; // If not, just don't allocate RIP or RSP
- }
- }];
}
// Segment registers for use by MOV instructions (and others) that have a
@@ -496,7 +385,7 @@ def GR32_TC : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
}
def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
- R8, R9, R11]> {
+ R8, R9, R11, RIP]> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
(GR16 sub_16bit),
(GR32_TC sub_32bit)];
@@ -543,48 +432,12 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8,
def GR16_NOREX : RegisterClass<"X86", [i16], 16,
[AX, CX, DX, SI, DI, BX, BP, SP]> {
let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)];
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- GR16_NOREXClass::iterator
- GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- // Does the function dedicate RBP / EBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate SP or BP.
- return end() - 2;
- else
- // If not, just don't allocate SP.
- return end() - 1;
- }
- }];
}
// GR32_NOREX - GR32 registers which do not require a REX prefix.
def GR32_NOREX : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
(GR16_NOREX sub_16bit)];
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- GR32_NOREXClass::iterator
- GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- // Does the function dedicate RBP / EBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate ESP or EBP.
- return end() - 2;
- else
- // If not, just don't allocate ESP.
- return end() - 1;
- }
- }];
}
// GR64_NOREX - GR64 registers which do not require a REX prefix.
def GR64_NOREX : RegisterClass<"X86", [i64], 64,
@@ -592,24 +445,6 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64,
let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
(GR16_NOREX sub_16bit),
(GR32_NOREX sub_32bit)];
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- GR64_NOREXClass::iterator
- GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- // Does the function dedicate RBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate RIP, RSP or RBP.
- return end() - 3;
- else
- // If not, just don't allocate RIP or RSP.
- return end() - 2;
- }
- }];
}
// GR32_NOSP - GR32 registers except ESP.
@@ -617,52 +452,6 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP,
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned X86_GR32_NOSP_AO_64[] = {
- X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
- X86::R8D, X86::R9D, X86::R10D, X86::R11D,
- X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP
- };
-
- GR32_NOSPClass::iterator
- GR32_NOSPClass::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (Subtarget.is64Bit())
- return X86_GR32_NOSP_AO_64;
- else
- return begin();
- }
-
- GR32_NOSPClass::iterator
- GR32_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- if (Subtarget.is64Bit()) {
- // Does the function dedicate RBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate EBP.
- return array_endof(X86_GR32_NOSP_AO_64) - 1;
- else
- // If not, any reg in this class is ok.
- return array_endof(X86_GR32_NOSP_AO_64);
- } else {
- // Does the function dedicate EBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate EBP.
- return begin() + 6;
- else
- // If not, any reg in this class is ok.
- return begin() + 7;
- }
- }
- }];
}
// GR64_NOSP - GR64 registers except RSP (and RIP).
@@ -672,25 +461,14 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64,
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
(GR16 sub_16bit),
(GR32_NOSP sub_32bit)];
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- GR64_NOSPClass::iterator
- GR64_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- if (!Subtarget.is64Bit())
- return begin(); // None of these are allocatable in 32-bit.
- // Does the function dedicate RBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- return end()-1; // If so, don't allocate RBP
- else
- return end(); // If not, any reg in this class is ok.
- }
- }];
+}
+
+// GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except
+// ESP.
+def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
+ [EAX, ECX, EDX, ESI, EDI, EBX, EBP]> {
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit)];
}
// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
@@ -698,26 +476,7 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, RBX, RBP]> {
let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
(GR16_NOREX sub_16bit),
- (GR32_NOREX sub_32bit)];
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- GR64_NOREX_NOSPClass::iterator
- GR64_NOREX_NOSPClass::allocation_order_end(const MachineFunction &MF) const
- {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
- // Does the function dedicate RBP to being a frame ptr?
- if (TFI->hasFP(MF) || MFI->getReserveFP())
- // If so, don't allocate RBP.
- return end() - 1;
- else
- // If not, any reg in this class is ok.
- return end();
- }
- }];
+ (GR32_NOREX_NOSP sub_32bit)];
}
// A class to support the 'A' assembler constraint: EAX then EDX.
@@ -731,42 +490,12 @@ def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> {
def FR32 : RegisterClass<"X86", [f32], 32,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11,
- XMM12, XMM13, XMM14, XMM15]> {
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- FR32Class::iterator
- FR32Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (!Subtarget.is64Bit())
- return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
- else
- return end();
- }
- }];
-}
+ XMM12, XMM13, XMM14, XMM15]>;
def FR64 : RegisterClass<"X86", [f64], 64,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11,
- XMM12, XMM13, XMM14, XMM15]> {
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- FR64Class::iterator
- FR64Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (!Subtarget.is64Bit())
- return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
- else
- return end();
- }
- }];
-}
+ XMM12, XMM13, XMM14, XMM15]>;
// FIXME: This sets up the floating point register files as though they are f64
@@ -784,15 +513,7 @@ def RFP80 : RegisterClass<"X86",[f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
// for transforming FPn allocations to STn registers)
def RST : RegisterClass<"X86", [f80, f64, f32], 32,
[ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> {
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- RSTClass::iterator
- RSTClass::allocation_order_end(const MachineFunction &MF) const {
- return begin();
- }
- }];
+ let isAllocatable = 0;
}
// Generic vector registers: VR64 and VR128.
@@ -803,21 +524,6 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
XMM8, XMM9, XMM10, XMM11,
XMM12, XMM13, XMM14, XMM15]> {
let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
-
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- VR128Class::iterator
- VR128Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (!Subtarget.is64Bit())
- return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
- else
- return end();
- }
- }];
}
def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256,
@@ -825,35 +531,10 @@ def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256,
YMM8, YMM9, YMM10, YMM11,
YMM12, YMM13, YMM14, YMM15]> {
let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
-
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- VR256Class::iterator
- VR256Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
- if (!Subtarget.is64Bit())
- return end()-8; // Only YMM0 to YMM7 are available in 32-bit mode.
- else
- return end();
- }
- }];
}
// Status flags registers.
def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
let CopyCost = -1; // Don't allow copying of status registers.
-
- // EFLAGS is not allocatable.
- let MethodProtos = [{
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- CCRClass::iterator
- CCRClass::allocation_order_end(const MachineFunction &MF) const {
- return allocation_order_begin(MF);
- }
- }];
+ let isAllocatable = 0;
}
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
index ba5864e..481e821 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -265,6 +265,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
+ HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1);
HasAES = IsIntel && ((ECX >> 25) & 0x1);
if (IsIntel || IsAMD) {
OpenPOWER on IntegriCloud