summaryrefslogtreecommitdiffstats
path: root/lib/Target/X86
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2010-01-15 15:37:28 +0000
committerrdivacky <rdivacky@FreeBSD.org>2010-01-15 15:37:28 +0000
commit3fba7d16b41dfbefe3b1be6bc0ab94c017728f79 (patch)
treebe5a687969f682edded4aa6f13594ffd9aa9030e /lib/Target/X86
parenta16c51cee9225a354c999dd1076d5dba2aa79807 (diff)
downloadFreeBSD-src-3fba7d16b41dfbefe3b1be6bc0ab94c017728f79.zip
FreeBSD-src-3fba7d16b41dfbefe3b1be6bc0ab94c017728f79.tar.gz
Update LLVM to 93512.
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp36
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp66
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp28
-rw-r--r--lib/Target/X86/README-SSE.txt20
-rw-r--r--lib/Target/X86/README.txt83
-rw-r--r--lib/Target/X86/X86.td4
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp20
-rw-r--r--lib/Target/X86/X86FastISel.cpp16
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp16
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp157
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp241
-rw-r--r--lib/Target/X86/X86Instr64bit.td94
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp73
-rw-r--r--lib/Target/X86/X86InstrInfo.h10
-rw-r--r--lib/Target/X86/X86InstrInfo.td75
-rw-r--r--lib/Target/X86/X86InstrSSE.td10
-rw-r--r--lib/Target/X86/X86JITInfo.cpp2
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp9
-rw-r--r--lib/Target/X86/X86Subtarget.cpp3
-rw-r--r--lib/Target/X86/X86Subtarget.h7
20 files changed, 594 insertions, 376 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index c357b4d..c4ae5d2 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Target/TargetAsmParser.h"
#include "X86.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
@@ -15,6 +16,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParsedAsmOperand.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Target/TargetAsmParser.h"
@@ -46,7 +48,7 @@ private:
/// @name Auto-generated Match Functions
/// {
- bool MatchInstruction(SmallVectorImpl<X86Operand> &Operands,
+ bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCInst &Inst);
/// MatchRegisterName - Match the given string to a register name, or 0 if
@@ -59,7 +61,8 @@ public:
X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
: TargetAsmParser(T), Parser(_Parser) {}
- virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst);
+ virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
virtual bool ParseDirective(AsmToken DirectiveID);
};
@@ -71,7 +74,7 @@ namespace {
/// X86Operand - Instances of this class represent a parsed X86 machine
/// instruction.
-struct X86Operand {
+struct X86Operand : public MCParsedAsmOperand {
enum {
Token,
Register,
@@ -400,10 +403,11 @@ bool X86ATTAsmParser::ParseMemOperand(X86Operand &Op) {
return false;
}
-bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
- SmallVector<X86Operand, 8> Operands;
+bool X86ATTAsmParser::
+ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- Operands.push_back(X86Operand::CreateToken(Name));
+ Operands.push_back(new X86Operand(X86Operand::CreateToken(Name)));
SMLoc Loc = getLexer().getTok().getLoc();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
@@ -411,31 +415,27 @@ bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) {
// Parse '*' modifier.
if (getLexer().is(AsmToken::Star)) {
getLexer().Lex(); // Eat the star.
- Operands.push_back(X86Operand::CreateToken("*"));
+ Operands.push_back(new X86Operand(X86Operand::CreateToken("*")));
}
// Read the first operand.
- Operands.push_back(X86Operand());
- if (ParseOperand(Operands.back()))
+ X86Operand Op;
+ if (ParseOperand(Op))
return true;
+ Operands.push_back(new X86Operand(Op));
+
while (getLexer().is(AsmToken::Comma)) {
getLexer().Lex(); // Eat the comma.
// Parse and remember the operand.
- Operands.push_back(X86Operand());
- if (ParseOperand(Operands.back()))
+ if (ParseOperand(Op))
return true;
+ Operands.push_back(new X86Operand(Op));
}
}
- if (!MatchInstruction(Operands, Inst))
- return false;
-
- // FIXME: We should give nicer diagnostics about the exact failure.
-
- Error(Loc, "unrecognized instruction");
- return true;
+ return false;
}
bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index b88063f..70c6dd0 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -201,6 +201,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
/// jump tables, constant pools, global address and external symbols, all of
/// which print to a label with various suffixes for relocation types etc.
void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
+ SmallString<128> TempNameStr;
switch (MO.getType()) {
default: llvm_unreachable("unknown symbol type!");
case MachineOperand::MO_JumpTableIndex:
@@ -236,41 +237,38 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
- SmallString<128> NameStr;
- Mang->getNameWithPrefix(NameStr, GV, true);
- NameStr += "$non_lazy_ptr";
- MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str());
+ Mang->getNameWithPrefix(TempNameStr, GV, true);
+ TempNameStr += "$non_lazy_ptr";
+ MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str());
const MCSymbol *&StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
if (StubSym == 0) {
- NameStr.clear();
- Mang->getNameWithPrefix(NameStr, GV, false);
- StubSym = OutContext.GetOrCreateSymbol(NameStr.str());
+ TempNameStr.clear();
+ Mang->getNameWithPrefix(TempNameStr, GV, false);
+ StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str());
}
} else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
- SmallString<128> NameStr;
- Mang->getNameWithPrefix(NameStr, GV, true);
- NameStr += "$non_lazy_ptr";
- MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str());
+ Mang->getNameWithPrefix(TempNameStr, GV, true);
+ TempNameStr += "$non_lazy_ptr";
+ MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str());
const MCSymbol *&StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);
if (StubSym == 0) {
- NameStr.clear();
- Mang->getNameWithPrefix(NameStr, GV, false);
- StubSym = OutContext.GetOrCreateSymbol(NameStr.str());
+ TempNameStr.clear();
+ Mang->getNameWithPrefix(TempNameStr, GV, false);
+ StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str());
}
} else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
- SmallString<128> NameStr;
- Mang->getNameWithPrefix(NameStr, GV, true);
- NameStr += "$stub";
- MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str());
+ Mang->getNameWithPrefix(TempNameStr, GV, true);
+ TempNameStr += "$stub";
+ MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str());
const MCSymbol *&StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
if (StubSym == 0) {
- NameStr.clear();
- Mang->getNameWithPrefix(NameStr, GV, false);
- StubSym = OutContext.GetOrCreateSymbol(NameStr.str());
+ TempNameStr.clear();
+ Mang->getNameWithPrefix(TempNameStr, GV, false);
+ StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str());
}
}
@@ -285,24 +283,32 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
break;
}
case MachineOperand::MO_ExternalSymbol: {
- std::string Name = Mang->makeNameProper(MO.getSymbolName());
+ const MCSymbol *SymToPrint;
if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
- Name += "$stub";
- MCSymbol *Sym = OutContext.GetOrCreateSymbol(StringRef(Name));
+ Mang->getNameWithPrefix(TempNameStr,
+ StringRef(MO.getSymbolName())+"$stub");
+ const MCSymbol *Sym = OutContext.GetOrCreateSymbol(TempNameStr.str());
const MCSymbol *&StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
if (StubSym == 0) {
- Name.erase(Name.end()-5, Name.end());
- StubSym = OutContext.GetOrCreateSymbol(StringRef(Name));
+ TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end());
+ StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str());
}
+ SymToPrint = StubSym;
+ } else {
+ Mang->getNameWithPrefix(TempNameStr, MO.getSymbolName());
+ SymToPrint = OutContext.GetOrCreateSymbol(TempNameStr.str());
}
// If the name begins with a dollar-sign, enclose it in parens. We do this
// to avoid having it look like an integer immediate to the assembler.
- if (Name[0] == '$')
- O << '(' << Name << ')';
- else
- O << Name;
+ if (SymToPrint->getName()[0] != '$')
+ SymToPrint->print(O, MAI);
+ else {
+ O << '(';
+ SymToPrint->print(O, MAI);
+ O << '(';
+ }
break;
}
}
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 1015b69..9ee118c 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Mangler.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/DebugInfo.h"
using namespace llvm;
@@ -399,6 +400,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(X86::MOVZX32rm16);
lower_subreg32(&OutMI, 0);
break;
+ case X86::MOV16r0:
+ OutMI.setOpcode(X86::MOV32r0);
+ lower_subreg32(&OutMI, 0);
+ break;
+ case X86::MOV64r0:
+ OutMI.setOpcode(X86::MOV32r0);
+ lower_subreg32(&OutMI, 0);
+ break;
}
}
@@ -412,6 +421,25 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
case TargetInstrInfo::GC_LABEL:
printLabel(MI);
return;
+ case TargetInstrInfo::DEBUG_VALUE: {
+ if (!VerboseAsm)
+ return;
+ O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason
+ DIVariable V((MDNode*)(MI->getOperand(2).getMetadata()));
+ O << V.getName();
+ O << " <- ";
+ if (MI->getOperand(0).getType()==MachineOperand::MO_Register)
+ printOperand(MI, 0);
+ else {
+ assert(MI->getOperand(0).getType()==MachineOperand::MO_Immediate);
+ int64_t imm = MI->getOperand(0).getImm();
+ O << '[' << ((imm<0) ? "EBP" : "ESP+") << imm << ']';
+ }
+ O << "+";
+ printOperand(MI, 1);
+ return;
+ }
case TargetInstrInfo::INLINEASM:
printInlineAsm(MI);
return;
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 71ad51c..0f3e44b 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -916,3 +916,23 @@ cheaper to do fld1 than load from a constant pool for example, so
"load, add 1.0, store" is better done in the fp stack, etc.
//===---------------------------------------------------------------------===//
+
+The X86 backend should be able to if-convert SSE comparisons like "ucomisd" to
+"cmpsd". For example, this code:
+
+double d1(double x) { return x == x ? x : x + x; }
+
+Compiles into:
+
+_d1:
+ ucomisd %xmm0, %xmm0
+ jnp LBB1_2
+ addsd %xmm0, %xmm0
+ ret
+LBB1_2:
+ ret
+
+Also, the 'ret's should be shared. This is PR6032.
+
+//===---------------------------------------------------------------------===//
+
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index afd9f53..aa7bb3d 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -530,7 +530,7 @@ We should inline lrintf and probably other libc functions.
//===---------------------------------------------------------------------===//
-Start using the flags more. For example, compile:
+Use the FLAGS values from arithmetic instructions more. For example, compile:
int add_zf(int *x, int y, int a, int b) {
if ((*x += y) == 0)
@@ -554,31 +554,8 @@ _add_zf:
movl %ecx, %eax
ret
-and:
-
-int add_zf(int *x, int y, int a, int b) {
- if ((*x + y) < 0)
- return a;
- else
- return b;
-}
-
-to:
-
-add_zf:
- addl (%rdi), %esi
- movl %edx, %eax
- cmovns %ecx, %eax
- ret
-
-instead of:
-
-_add_zf:
- addl (%rdi), %esi
- testl %esi, %esi
- cmovs %edx, %ecx
- movl %ecx, %eax
- ret
+As another example, compile function f2 in test/CodeGen/X86/cmp-test.ll
+without a test instruction.
//===---------------------------------------------------------------------===//
@@ -685,55 +662,6 @@ Though this probably isn't worth it.
//===---------------------------------------------------------------------===//
-We need to teach the codegen to convert two-address INC instructions to LEA
-when the flags are dead (likewise dec). For example, on X86-64, compile:
-
-int foo(int A, int B) {
- return A+1;
-}
-
-to:
-
-_foo:
- leal 1(%edi), %eax
- ret
-
-instead of:
-
-_foo:
- incl %edi
- movl %edi, %eax
- ret
-
-Another example is:
-
-;; X's live range extends beyond the shift, so the register allocator
-;; cannot coalesce it with Y. Because of this, a copy needs to be
-;; emitted before the shift to save the register value before it is
-;; clobbered. However, this copy is not needed if the register
-;; allocator turns the shift into an LEA. This also occurs for ADD.
-
-; Check that the shift gets turned into an LEA.
-; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
-; RUN: not grep {mov E.X, E.X}
-
-@G = external global i32 ; <i32*> [#uses=3]
-
-define i32 @test1(i32 %X, i32 %Y) {
- %Z = add i32 %X, %Y ; <i32> [#uses=1]
- volatile store i32 %Y, i32* @G
- volatile store i32 %Z, i32* @G
- ret i32 %X
-}
-
-define i32 @test2(i32 %X) {
- %Z = add i32 %X, 1 ; <i32> [#uses=1]
- volatile store i32 %Z, i32* @G
- ret i32 %X
-}
-
-//===---------------------------------------------------------------------===//
-
Sometimes it is better to codegen subtractions from a constant (e.g. 7-x) with
a neg instead of a sub instruction. Consider:
@@ -854,11 +782,6 @@ __Z11no_overflowjj:
//===---------------------------------------------------------------------===//
-Re-materialize MOV32r0 etc. with xor instead of changing them to moves if the
-condition register is dead. xor reg reg is shorter than mov reg, #0.
-
-//===---------------------------------------------------------------------===//
-
The following code:
bb114.preheader: ; preds = %cond_next94
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index a6e1ca3..7919559 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -23,6 +23,7 @@ include "llvm/Target/Target.td"
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
"Enable conditional move instructions">;
+
def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
"Enable MMX instructions">;
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
@@ -66,6 +67,9 @@ def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true",
"Enable three-operand fused multiple-add">;
def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
"Enable four-operand fused multiple-add">;
+def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
+ "HasVectorUAMem", "true",
+ "Allow unaligned memory operands on vector/SIMD instructions">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 4892e17..828e872 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -135,7 +135,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
IsPIC = TM.getRelocationModel() == Reloc::PIC_;
do {
- DEBUG(errs() << "JITTing function '"
+ DEBUG(dbgs() << "JITTing function '"
<< MF.getFunction()->getName() << "'\n");
MCE.startFunction(MF);
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
@@ -477,7 +477,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
template<class CodeEmitter>
void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
const TargetInstrDesc *Desc) {
- DEBUG(errs() << MI);
+ DEBUG(dbgs() << MI);
MCE.processDebugLoc(MI.getDebugLoc(), true);
@@ -618,11 +618,11 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
const MachineOperand &MO = MI.getOperand(CurOp++);
- DEBUG(errs() << "RawFrm CurOp " << CurOp << "\n");
- DEBUG(errs() << "isMBB " << MO.isMBB() << "\n");
- DEBUG(errs() << "isGlobal " << MO.isGlobal() << "\n");
- DEBUG(errs() << "isSymbol " << MO.isSymbol() << "\n");
- DEBUG(errs() << "isImm " << MO.isImm() << "\n");
+ DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n");
+ DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n");
+ DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n");
+ DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n");
+ DEBUG(dbgs() << "isImm " << MO.isImm() << "\n");
if (MO.isMBB()) {
emitPCRelativeBlockAddress(MO.getMBB());
@@ -843,7 +843,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
if (!Desc->isVariadic() && CurOp != NumOps) {
#ifndef NDEBUG
- errs() << "Cannot encode all operands of: " << MI << "\n";
+ dbgs() << "Cannot encode all operands of: " << MI << "\n";
#endif
llvm_unreachable(0);
}
@@ -1082,9 +1082,9 @@ public:
}
if (!OK) {
- errs() << "couldn't convert inst '";
+ dbgs() << "couldn't convert inst '";
MI.dump();
- errs() << "' to machine instr:\n";
+ dbgs() << "' to machine instr:\n";
Instr->dump();
}
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 431c120..7e02d59 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -786,8 +786,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) {
bool X86FastISel::X86SelectZExt(Instruction *I) {
// Handle zero-extension from i1 to i8, which is common.
- if (I->getType() == Type::getInt8Ty(I->getContext()) &&
- I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) {
+ if (I->getType()->isInteger(8) &&
+ I->getOperand(0)->getType()->isInteger(1)) {
unsigned ResultReg = getRegForValue(I->getOperand(0));
if (ResultReg == 0) return false;
// Set the high bits to zero.
@@ -948,7 +948,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
bool X86FastISel::X86SelectShift(Instruction *I) {
unsigned CReg = 0, OpReg = 0, OpImm = 0;
const TargetRegisterClass *RC = NULL;
- if (I->getType() == Type::getInt8Ty(I->getContext())) {
+ if (I->getType()->isInteger(8)) {
CReg = X86::CL;
RC = &X86::GR8RegClass;
switch (I->getOpcode()) {
@@ -957,7 +957,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
default: return false;
}
- } else if (I->getType() == Type::getInt16Ty(I->getContext())) {
+ } else if (I->getType()->isInteger(16)) {
CReg = X86::CX;
RC = &X86::GR16RegClass;
switch (I->getOpcode()) {
@@ -966,7 +966,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
default: return false;
}
- } else if (I->getType() == Type::getInt32Ty(I->getContext())) {
+ } else if (I->getType()->isInteger(32)) {
CReg = X86::ECX;
RC = &X86::GR32RegClass;
switch (I->getOpcode()) {
@@ -975,7 +975,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
default: return false;
}
- } else if (I->getType() == Type::getInt64Ty(I->getContext())) {
+ } else if (I->getType()->isInteger(64)) {
CReg = X86::RCX;
RC = &X86::GR64RegClass;
switch (I->getOpcode()) {
@@ -1230,8 +1230,8 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
CC != CallingConv::X86_FastCall)
return false;
- // On X86, -tailcallopt changes the fastcc ABI. FastISel doesn't
- // handle this for now.
+ // fastcc with -tailcallopt is intended to provide a guaranteed
+ // tail call optimization. Fastisel doesn't know how to do that.
if (CC == CallingConv::Fast && PerformTailCallOpt)
return false;
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 044bd4b..503ac14 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -75,12 +75,12 @@ namespace {
unsigned StackTop; // The current top of the FP stack.
void dumpStack() const {
- errs() << "Stack contents:";
+ dbgs() << "Stack contents:";
for (unsigned i = 0; i != StackTop; ++i) {
- errs() << " FP" << Stack[i];
+ dbgs() << " FP" << Stack[i];
assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
}
- errs() << "\n";
+ dbgs() << "\n";
}
private:
/// isStackEmpty - Return true if the FP stack is empty.
@@ -246,7 +246,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
PrevMI = prior(I);
++NumFP; // Keep track of # of pseudo instrs
- DEBUG(errs() << "\nFPInst:\t" << *MI);
+ DEBUG(dbgs() << "\nFPInst:\t" << *MI);
// Get dead variables list now because the MI pointer may be deleted as part
// of processing!
@@ -273,7 +273,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
unsigned Reg = DeadRegs[i];
if (Reg >= X86::FP0 && Reg <= X86::FP6) {
- DEBUG(errs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
+ DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
freeStackSlotAfter(I, Reg-X86::FP0);
}
}
@@ -282,13 +282,13 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
DEBUG(
MachineBasicBlock::iterator PrevI(PrevMI);
if (I == PrevI) {
- errs() << "Just deleted pseudo instruction\n";
+ dbgs() << "Just deleted pseudo instruction\n";
} else {
MachineBasicBlock::iterator Start = I;
// Rewind to first instruction newly inserted.
while (Start != BB.begin() && prior(Start) != PrevI) --Start;
- errs() << "Inserted instructions:\n\t";
- Start->print(errs(), &MF.getTarget());
+ dbgs() << "Inserted instructions:\n\t";
+ Start->print(dbgs(), &MF.getTarget());
while (++Start != llvm::next(I)) {}
}
dumpStack();
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index cb82383..e2a53d1 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -113,37 +113,37 @@ namespace {
}
void dump() {
- errs() << "X86ISelAddressMode " << this << '\n';
- errs() << "Base.Reg ";
+ dbgs() << "X86ISelAddressMode " << this << '\n';
+ dbgs() << "Base.Reg ";
if (Base.Reg.getNode() != 0)
Base.Reg.getNode()->dump();
else
- errs() << "nul";
- errs() << " Base.FrameIndex " << Base.FrameIndex << '\n'
+ dbgs() << "nul";
+ dbgs() << " Base.FrameIndex " << Base.FrameIndex << '\n'
<< " Scale" << Scale << '\n'
<< "IndexReg ";
if (IndexReg.getNode() != 0)
IndexReg.getNode()->dump();
else
- errs() << "nul";
- errs() << " Disp " << Disp << '\n'
+ dbgs() << "nul";
+ dbgs() << " Disp " << Disp << '\n'
<< "GV ";
if (GV)
GV->dump();
else
- errs() << "nul";
- errs() << " CP ";
+ dbgs() << "nul";
+ dbgs() << " CP ";
if (CP)
CP->dump();
else
- errs() << "nul";
- errs() << '\n'
+ dbgs() << "nul";
+ dbgs() << '\n'
<< "ES ";
if (ES)
- errs() << ES;
+ dbgs() << ES;
else
- errs() << "nul";
- errs() << " JT" << JT << " Align" << Align << '\n';
+ dbgs() << "nul";
+ dbgs() << " JT" << JT << " Align" << Align << '\n';
}
};
}
@@ -190,7 +190,7 @@ namespace {
#include "X86GenDAGISel.inc"
private:
- SDNode *Select(SDValue N);
+ SDNode *Select(SDNode *N);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
@@ -201,19 +201,19 @@ namespace {
bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth);
bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
- bool SelectAddr(SDValue Op, SDValue N, SDValue &Base,
+ bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,
+ bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp);
- bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
+ bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp);
- bool SelectScalarSSELoad(SDValue Op, SDValue Pred,
+ bool SelectScalarSSELoad(SDNode *Op, SDValue Pred,
SDValue N, SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment,
SDValue &InChain, SDValue &OutChain);
- bool TryFoldLoad(SDValue P, SDValue N,
+ bool TryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment);
@@ -310,6 +310,11 @@ bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
if (U == Root)
switch (U->getOpcode()) {
default: break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::AND:
+ case X86ISD::XOR:
+ case X86ISD::OR:
case ISD::ADD:
case ISD::ADDC:
case ISD::ADDE:
@@ -675,12 +680,12 @@ void X86DAGToDAGISel::InstructionSelect() {
// Codegen the basic block.
#ifndef NDEBUG
- DEBUG(errs() << "===== Instruction selection begins:\n");
+ DEBUG(dbgs() << "===== Instruction selection begins:\n");
Indent = 0;
#endif
SelectRoot(*CurDAG);
#ifndef NDEBUG
- DEBUG(errs() << "===== Instruction selection ends:\n");
+ DEBUG(dbgs() << "===== Instruction selection ends:\n");
#endif
CurDAG->RemoveDeadNodes();
@@ -850,7 +855,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
bool is64Bit = Subtarget->is64Bit();
DebugLoc dl = N.getDebugLoc();
DEBUG({
- errs() << "MatchAddress: ";
+ dbgs() << "MatchAddress: ";
AM.dump();
});
// Limit recursion.
@@ -1268,7 +1273,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
/// SelectAddr - returns true if it is able pattern match an addressing mode.
/// It returns the operands which make up the maximal addressing mode it can
/// match by reference.
-bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
X86ISelAddressMode AM;
@@ -1291,7 +1296,7 @@ bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
/// match a load whose top elements are either undef or zeros. The load flavor
/// is derived from the type of N, which is either v4f32 or v2f64.
-bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred,
SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment,
@@ -1302,7 +1307,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,
if (ISD::isNON_EXTLoad(InChain.getNode()) &&
InChain.getValue(0).hasOneUse() &&
N.hasOneUse() &&
- IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) {
+ IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) {
LoadSDNode *LD = cast<LoadSDNode>(InChain);
if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
@@ -1333,7 +1338,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,
/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
/// mode it matches can be cost effectively emitted as an LEA instruction.
-bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
+bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp) {
X86ISelAddressMode AM;
@@ -1395,10 +1400,10 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
}
/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
-bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp) {
- assert(Op.getOpcode() == X86ISD::TLSADDR);
+ assert(Op->getOpcode() == X86ISD::TLSADDR);
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
@@ -1421,13 +1426,13 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
}
-bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
+bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
if (ISD::isNON_EXTLoad(N.getNode()) &&
N.hasOneUse() &&
- IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
+ IsLegalAndProfitableToFold(N.getNode(), P, P))
return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
return false;
}
@@ -1454,7 +1459,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
SDValue In2L = Node->getOperand(2);
SDValue In2H = Node->getOperand(3);
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
return NULL;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
@@ -1480,7 +1485,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
SDValue Ptr = Node->getOperand(1);
SDValue Val = Node->getOperand(2);
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
return 0;
bool isInc = false, isDec = false, isSub = false, isCN = false;
@@ -1678,8 +1683,7 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
return true;
}
-SDNode *X86DAGToDAGISel::Select(SDValue N) {
- SDNode *Node = N.getNode();
+SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
EVT NVT = Node->getValueType(0);
unsigned Opc, MOpc;
unsigned Opcode = Node->getOpcode();
@@ -1687,9 +1691,9 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
#ifndef NDEBUG
DEBUG({
- errs() << std::string(Indent, ' ') << "Selecting: ";
+ dbgs() << std::string(Indent, ' ') << "Selecting: ";
Node->dump(CurDAG);
- errs() << '\n';
+ dbgs() << '\n';
});
Indent += 2;
#endif
@@ -1697,9 +1701,9 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
if (Node->isMachineOpcode()) {
#ifndef NDEBUG
DEBUG({
- errs() << std::string(Indent-2, ' ') << "== ";
+ dbgs() << std::string(Indent-2, ' ') << "== ";
Node->dump(CurDAG);
- errs() << '\n';
+ dbgs() << '\n';
});
Indent -= 2;
#endif
@@ -1767,10 +1771,10 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
// Multiply is commmutative.
if (!foldedLoad) {
- foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
if (foldedLoad)
std::swap(N0, N1);
}
@@ -1793,21 +1797,21 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
}
// Copy the low half of the result, if it is needed.
- if (!N.getValue(0).use_empty()) {
+ if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
- ReplaceUses(N.getValue(0), Result);
+ ReplaceUses(SDValue(Node, 0), Result);
#ifndef NDEBUG
DEBUG({
- errs() << std::string(Indent-2, ' ') << "=> ";
+ dbgs() << std::string(Indent-2, ' ') << "=> ";
Result.getNode()->dump(CurDAG);
- errs() << '\n';
+ dbgs() << '\n';
});
#endif
}
// Copy the high half of the result, if it is needed.
- if (!N.getValue(1).use_empty()) {
+ if (!SDValue(Node, 1).use_empty()) {
SDValue Result;
if (HiReg == X86::AH && Subtarget->is64Bit()) {
// Prevent use of AH in a REX instruction by referencing AX instead.
@@ -1826,12 +1830,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
HiReg, NVT, InFlag);
InFlag = Result.getValue(2);
}
- ReplaceUses(N.getValue(1), Result);
+ ReplaceUses(SDValue(Node, 1), Result);
#ifndef NDEBUG
DEBUG({
- errs() << std::string(Indent-2, ' ') << "=> ";
+ dbgs() << std::string(Indent-2, ' ') << "=> ";
Result.getNode()->dump(CurDAG);
- errs() << '\n';
+ dbgs() << '\n';
});
#endif
}
@@ -1869,7 +1873,6 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
unsigned LoReg, HiReg, ClrReg;
unsigned ClrOpcode, SExtOpcode;
- EVT ClrVT = NVT;
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8:
@@ -1879,7 +1882,7 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
break;
case MVT::i16:
LoReg = X86::AX; HiReg = X86::DX;
- ClrOpcode = X86::MOV32r0; ClrReg = X86::EDX; ClrVT = MVT::i32;
+ ClrOpcode = X86::MOV16r0; ClrReg = X86::DX;
SExtOpcode = X86::CWD;
break;
case MVT::i32:
@@ -1889,13 +1892,13 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
break;
case MVT::i64:
LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
- ClrOpcode = ~0U; // NOT USED.
+ ClrOpcode = X86::MOV64r0;
SExtOpcode = X86::CQO;
break;
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
bool signBitIsZero = CurDAG->SignBitIsZero(N0);
SDValue InFlag;
@@ -1903,7 +1906,7 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
// Special case for div8, just use a move with zero extension to AX to
// clear the upper 8 bits (AH).
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
- if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
Move =
SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16,
@@ -1928,24 +1931,8 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
} else {
// Zero out the high part, effectively zero extending the input.
- SDValue ClrNode;
-
- if (NVT.getSimpleVT() == MVT::i64) {
- ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32),
- 0);
- // We just did a 32-bit clear, insert it into a 64-bit register to
- // clear the whole 64-bit reg.
- SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64);
- SDValue SubRegNo =
- CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);
- ClrNode =
- SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl,
- MVT::i64, Zero, ClrNode, SubRegNo),
- 0);
- } else {
- ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, ClrVT), 0);
- }
-
+ SDValue ClrNode =
+ SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
ClrNode, InFlag).getValue(1);
}
@@ -1966,21 +1953,21 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
}
// Copy the division (low) result, if it is needed.
- if (!N.getValue(0).use_empty()) {
+ if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
- ReplaceUses(N.getValue(0), Result);
+ ReplaceUses(SDValue(Node, 0), Result);
#ifndef NDEBUG
DEBUG({
- errs() << std::string(Indent-2, ' ') << "=> ";
+ dbgs() << std::string(Indent-2, ' ') << "=> ";
Result.getNode()->dump(CurDAG);
- errs() << '\n';
+ dbgs() << '\n';
});
#endif
}
// Copy the remainder (high) result, if it is needed.
- if (!N.getValue(1).use_empty()) {
+ if (!SDValue(Node, 1).use_empty()) {
SDValue Result;
if (HiReg == X86::AH && Subtarget->is64Bit()) {
// Prevent use of AH in a REX instruction by referencing AX instead.
@@ -2000,12 +1987,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
HiReg, NVT, InFlag);
InFlag = Result.getValue(2);
}
- ReplaceUses(N.getValue(1), Result);
+ ReplaceUses(SDValue(Node, 1), Result);
#ifndef NDEBUG
DEBUG({
- errs() << std::string(Indent-2, ' ') << "=> ";
+ dbgs() << std::string(Indent-2, ' ') << "=> ";
Result.getNode()->dump(CurDAG);
- errs() << '\n';
+ dbgs() << '\n';
});
#endif
}
@@ -2124,16 +2111,16 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
}
}
- SDNode *ResNode = SelectCode(N);
+ SDNode *ResNode = SelectCode(Node);
#ifndef NDEBUG
DEBUG({
- errs() << std::string(Indent-2, ' ') << "=> ";
- if (ResNode == NULL || ResNode == N.getNode())
- N.getNode()->dump(CurDAG);
+ dbgs() << std::string(Indent-2, ' ') << "=> ";
+ if (ResNode == NULL || ResNode == Node)
+ Node->dump(CurDAG);
else
ResNode->dump(CurDAG);
- errs() << '\n';
+ dbgs() << '\n';
});
Indent -= 2;
#endif
@@ -2150,7 +2137,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
case 'v': // not offsetable ??
default: return true;
case 'm': // memory
- if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4))
+ if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4))
return true;
break;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index c722fbf..228ec9f 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -978,6 +978,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::MEMBARRIER);
setTargetDAGCombine(ISD::ZERO_EXTEND);
@@ -2077,10 +2078,10 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
assert(((Callee.getOpcode() == ISD::Register &&
(cast<RegisterSDNode>(Callee)->getReg() == X86::EAX ||
- cast<RegisterSDNode>(Callee)->getReg() == X86::R9)) ||
+ cast<RegisterSDNode>(Callee)->getReg() == X86::R11)) ||
Callee.getOpcode() == ISD::TargetExternalSymbol ||
Callee.getOpcode() == ISD::TargetGlobalAddress) &&
- "Expecting an global address, external symbol, or register");
+ "Expecting a global address, external symbol, or scratch register");
return DAG.getNode(X86ISD::TC_RETURN, dl,
NodeTys, &Ops[0], Ops.size());
@@ -5610,13 +5611,21 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
// because a TEST instruction will be better.
bool NonFlagUse = false;
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() != ISD::BRCOND &&
- UI->getOpcode() != ISD::SELECT &&
- UI->getOpcode() != ISD::SETCC) {
+ UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ unsigned UOpNo = UI.getOperandNo();
+ if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
+ // Look pass truncate.
+ UOpNo = User->use_begin().getOperandNo();
+ User = *User->use_begin();
+ }
+ if (User->getOpcode() != ISD::BRCOND &&
+ User->getOpcode() != ISD::SETCC &&
+ (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
NonFlagUse = true;
break;
}
+ }
if (!NonFlagUse)
break;
}
@@ -5680,6 +5689,56 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
}
+/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
+/// if it's possible.
+static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC,
+ DebugLoc dl, SelectionDAG &DAG) {
+ SDValue LHS, RHS;
+ if (Op0.getOperand(1).getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *Op010C =
+ dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))
+ if (Op010C->getZExtValue() == 1) {
+ LHS = Op0.getOperand(0);
+ RHS = Op0.getOperand(1).getOperand(1);
+ }
+ } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *Op000C =
+ dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))
+ if (Op000C->getZExtValue() == 1) {
+ LHS = Op0.getOperand(1);
+ RHS = Op0.getOperand(0).getOperand(1);
+ }
+ } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {
+ ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));
+ SDValue AndLHS = Op0.getOperand(0);
+ if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
+ LHS = AndLHS.getOperand(0);
+ RHS = AndLHS.getOperand(1);
+ }
+ }
+
+ if (LHS.getNode()) {
+ // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT
+ // instruction. Since the shift amount is in-range-or-undefined, we know
+ // that doing a bittest on the i16 value is ok. We extend to i32 because
+ // the encoding for the i16 version is larger than the i32 version.
+ if (LHS.getValueType() == MVT::i8)
+ LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
+
+ // If the operand types disagree, extend the shift amount to match. Since
+ // BT ignores high bits (like shifts) we can use anyextend.
+ if (LHS.getValueType() != RHS.getValueType())
+ RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
+
+ SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
+ unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(Cond, MVT::i8), BT);
+ }
+
+ return SDValue();
+}
+
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
SDValue Op0 = Op.getOperand(0);
@@ -5687,6 +5746,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // Optimize to BT if possible.
// Lower (X & (1 << N)) == 0 to BT(X, N).
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
@@ -5695,48 +5755,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
Op1.getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(Op1)->getZExtValue() == 0 &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
- SDValue LHS, RHS;
- if (Op0.getOperand(1).getOpcode() == ISD::SHL) {
- if (ConstantSDNode *Op010C =
- dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))
- if (Op010C->getZExtValue() == 1) {
- LHS = Op0.getOperand(0);
- RHS = Op0.getOperand(1).getOperand(1);
- }
- } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {
- if (ConstantSDNode *Op000C =
- dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))
- if (Op000C->getZExtValue() == 1) {
- LHS = Op0.getOperand(1);
- RHS = Op0.getOperand(0).getOperand(1);
- }
- } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {
- ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));
- SDValue AndLHS = Op0.getOperand(0);
- if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
- LHS = AndLHS.getOperand(0);
- RHS = AndLHS.getOperand(1);
- }
- }
-
- if (LHS.getNode()) {
- // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT
- // instruction. Since the shift amount is in-range-or-undefined, we know
- // that doing a bittest on the i16 value is ok. We extend to i32 because
- // the encoding for the i16 version is larger than the i32 version.
- if (LHS.getValueType() == MVT::i8)
- LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
-
- // If the operand types disagree, extend the shift amount to match. Since
- // BT ignores high bits (like shifts) we can use anyextend.
- if (LHS.getValueType() != RHS.getValueType())
- RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
-
- SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
- unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(Cond, MVT::i8), BT);
- }
+ SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
+ if (NewSetCC.getNode())
+ return NewSetCC;
}
bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
@@ -5936,6 +5957,23 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
}
if (addTest) {
+ // Look pass the truncate.
+ if (Cond.getOpcode() == ISD::TRUNCATE)
+ Cond = Cond.getOperand(0);
+
+ // We know the result of AND is compared against zero. Try to match
+ // it to BT.
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+ if (NewSetCC.getNode()) {
+ CC = NewSetCC.getOperand(0);
+ Cond = NewSetCC.getOperand(1);
+ addTest = false;
+ }
+ }
+ }
+
+ if (addTest) {
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Cond = EmitTest(Cond, X86::COND_NE, DAG);
}
@@ -6093,6 +6131,23 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
}
if (addTest) {
+ // Look pass the truncate.
+ if (Cond.getOpcode() == ISD::TRUNCATE)
+ Cond = Cond.getOperand(0);
+
+ // We know the result of AND is compared against zero. Try to match
+ // it to BT.
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+ if (NewSetCC.getNode()) {
+ CC = NewSetCC.getOperand(0);
+ Cond = NewSetCC.getOperand(1);
+ addTest = false;
+ }
+ }
+ }
+
+ if (addTest) {
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Cond = EmitTest(Cond, X86::COND_NE, DAG);
}
@@ -7524,8 +7579,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
// x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
- return Ty1 == Type::getInt32Ty(Ty1->getContext()) &&
- Ty2 == Type::getInt64Ty(Ty1->getContext()) && Subtarget->is64Bit();
+ return Ty1->isInteger(64) && Ty2->isInteger(64) && Subtarget->is64Bit();
}
bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
@@ -7749,7 +7803,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
for (int i=0; i < 2 + X86AddrNumOperands; ++i)
argOpers[i] = &bInstr->getOperand(i+2);
- // x86 address has 4 operands: base, index, scale, and displacement
+ // x86 address has 5 operands: base, index, scale, displacement, and segment.
int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
@@ -7777,14 +7831,16 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
.addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
- unsigned tt1 = F->getRegInfo().createVirtualRegister(RC);
- unsigned tt2 = F->getRegInfo().createVirtualRegister(RC);
+ // The subsequent operations should be using the destination registers of
+ //the PHI instructions.
if (invSrc) {
- MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt1).addReg(t1);
- MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt2).addReg(t2);
+ t1 = F->getRegInfo().createVirtualRegister(RC);
+ t2 = F->getRegInfo().createVirtualRegister(RC);
+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg());
+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg());
} else {
- tt1 = t1;
- tt2 = t2;
+ t1 = dest1Oper.getReg();
+ t2 = dest2Oper.getReg();
}
int valArgIndx = lastAddrIndx + 1;
@@ -7798,7 +7854,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
else
MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
if (regOpcL != X86::MOV32rr)
- MIB.addReg(tt1);
+ MIB.addReg(t1);
(*MIB).addOperand(*argOpers[valArgIndx]);
assert(argOpers[valArgIndx + 1]->isReg() ==
argOpers[valArgIndx]->isReg());
@@ -7809,7 +7865,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
else
MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
if (regOpcH != X86::MOV32rr)
- MIB.addReg(tt2);
+ MIB.addReg(t2);
(*MIB).addOperand(*argOpers[valArgIndx + 1]);
MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX);
@@ -9108,6 +9164,64 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64 || !Subtarget->is64Bit())
+ return SDValue();
+
+ // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ SDValue ShAmt0 = N0.getOperand(1);
+ if (ShAmt0.getValueType() != MVT::i8)
+ return SDValue();
+ SDValue ShAmt1 = N1.getOperand(1);
+ if (ShAmt1.getValueType() != MVT::i8)
+ return SDValue();
+ if (ShAmt0.getOpcode() == ISD::TRUNCATE)
+ ShAmt0 = ShAmt0.getOperand(0);
+ if (ShAmt1.getOpcode() == ISD::TRUNCATE)
+ ShAmt1 = ShAmt1.getOperand(0);
+
+ DebugLoc DL = N->getDebugLoc();
+ unsigned Opc = X86ISD::SHLD;
+ SDValue Op0 = N0.getOperand(0);
+ SDValue Op1 = N1.getOperand(0);
+ if (ShAmt0.getOpcode() == ISD::SUB) {
+ Opc = X86ISD::SHRD;
+ std::swap(Op0, Op1);
+ std::swap(ShAmt0, ShAmt1);
+ }
+
+ if (ShAmt1.getOpcode() == ISD::SUB) {
+ SDValue Sum = ShAmt1.getOperand(0);
+ if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
+ if (SumC->getSExtValue() == 64 &&
+ ShAmt1.getOperand(1) == ShAmt0)
+ return DAG.getNode(Opc, DL, VT,
+ Op0, Op1,
+ DAG.getNode(ISD::TRUNCATE, DL,
+ MVT::i8, ShAmt0));
+ }
+ } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
+ ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
+ if (ShAmt0C &&
+ ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == 64)
+ return DAG.getNode(Opc, DL, VT,
+ N0.getOperand(0), N1.getOperand(0),
+ DAG.getNode(ISD::TRUNCATE, DL,
+ MVT::i8, ShAmt0));
+ }
+
+ return SDValue();
+}
+
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
@@ -9370,6 +9484,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
+ case ISD::OR: return PerformOrCombine(N, DAG, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
@@ -9423,7 +9538,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
std::string AsmStr = IA->getAsmString();
// TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
- std::vector<std::string> AsmPieces;
+ SmallVector<StringRef, 4> AsmPieces;
SplitString(AsmStr, AsmPieces, "\n"); // ; as separator?
switch (AsmPieces.size()) {
@@ -9445,7 +9560,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
return LowerToBSwap(CI);
}
// rorw $$8, ${0:w} --> llvm.bswap.i16
- if (CI->getType() == Type::getInt16Ty(CI->getContext()) &&
+ if (CI->getType()->isInteger(16) &&
AsmPieces.size() == 3 &&
AsmPieces[0] == "rorw" &&
AsmPieces[1] == "$$8," &&
@@ -9455,12 +9570,12 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
}
break;
case 3:
- if (CI->getType() == Type::getInt64Ty(CI->getContext()) &&
+ if (CI->getType()->isInteger(64) &&
Constraints.size() >= 2 &&
Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
// bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
- std::vector<std::string> Words;
+ SmallVector<StringRef, 4> Words;
SplitString(AsmPieces[0], Words, " \t");
if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
Words.clear();
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 65fbbda..08e1dd1 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1106,13 +1106,13 @@ def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst),
def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst),
(ins GR64:$src1, i64i8imm:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)),
+ (implicit EFLAGS)]>;
def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst),
(ins GR64:$src1, i64i32imm:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)),
+ (implicit EFLAGS)]>;
} // isTwoAddress
def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
@@ -1598,17 +1598,21 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
// Alias Instructions
//===----------------------------------------------------------------------===//
-// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's
-// equivalent due to implicit zero-extending, and it sometimes has a smaller
-// encoding.
+// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
+// smaller encoding, but doing so at isel time interferes with rematerialization
+// in the current register allocator. For now, this is rewritten when the
+// instruction is lowered to an MCInst.
// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
// when we have a better way to specify isel priority.
-let AddedComplexity = 1 in
-def : Pat<(i64 0),
- (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>;
-
-
-// Materialize i64 constant where top 32-bits are zero.
+let Defs = [EFLAGS],
+ AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins),
+ "",
+ [(set GR64:$dst, 0)]>;
+
+// Materialize i64 constant where top 32-bits are zero. This could theoretically
+// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
+// that would make it more difficult to rematerialize.
let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
"", [(set GR64:$dst, i64immZExt32:$src)]>;
@@ -1683,6 +1687,7 @@ def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
"cmpxchg16b\t$dst", []>, TB;
@@ -1962,6 +1967,17 @@ def : Pat<(add GR64:$src1, 0x0000000080000000),
def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
(SUB64mi32 addr:$dst, 0xffffffff80000000)>;
+// Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it
+// has an immediate with at least 32 bits of leading zeros, to avoid needing to
+// materialize that immediate in a register first.
+def : Pat<(and GR64:$src, i64immZExt32:$imm),
+ (SUBREG_TO_REG
+ (i64 0),
+ (AND32ri
+ (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit),
+ imm:$imm),
+ x86_subreg_32bit)>;
+
// r & (2^32-1) ==> movz
def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
(MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
@@ -2028,7 +2044,7 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
x86_subreg_8bit_hi))>,
Requires<[In64BitMode]>;
-def : Pat<(srl_su GR16:$src, (i8 8)),
+def : Pat<(srl GR16:$src, (i8 8)),
(EXTRACT_SUBREG
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
@@ -2098,24 +2114,7 @@ def : Pat<(sra GR64:$src1, (and CL:$amt, 63)),
def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
(SAR64mCL addr:$dst)>;
-// (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c)
-def : Pat<(or (srl GR64:$src1, CL:$amt),
- (shl GR64:$src2, (sub 64, CL:$amt))),
- (SHRD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt),
- (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
- (SHRD64mrCL addr:$dst, GR64:$src2)>;
-
-def : Pat<(or (srl GR64:$src1, (i8 (trunc RCX:$amt))),
- (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
- (SHRD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (srl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
- (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
- addr:$dst),
- (SHRD64mrCL addr:$dst, GR64:$src2)>;
-
+// Double shift patterns
def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
(SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
@@ -2123,24 +2122,6 @@ def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1),
GR64:$src2, (i8 imm:$amt2)), addr:$dst),
(SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
-// (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
-def : Pat<(or (shl GR64:$src1, CL:$amt),
- (srl GR64:$src2, (sub 64, CL:$amt))),
- (SHLD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt),
- (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
- (SHLD64mrCL addr:$dst, GR64:$src2)>;
-
-def : Pat<(or (shl GR64:$src1, (i8 (trunc RCX:$amt))),
- (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
- (SHLD64rrCL GR64:$src1, GR64:$src2)>;
-
-def : Pat<(store (or (shl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
- (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
- addr:$dst),
- (SHLD64mrCL addr:$dst, GR64:$src2)>;
-
def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
(SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
@@ -2148,6 +2129,19 @@ def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),
GR64:$src2, (i8 imm:$amt2)), addr:$dst),
(SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
+let AddedComplexity = 5 in { // Try this before the selecting to OR
+def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt8:$src2),
+ (implicit EFLAGS)),
+ (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt32:$src2),
+ (implicit EFLAGS)),
+ (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(parallel (or_is_add GR64:$src1, GR64:$src2),
+ (implicit EFLAGS)),
+ (ADD64rr GR64:$src1, GR64:$src2)>;
+} // AddedComplexity
+
// X86 specific add which produces a flag.
def : Pat<(addc GR64:$src1, GR64:$src2),
(ADD64rr GR64:$src1, GR64:$src2)>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index e555cd1..7b39fb3 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
@@ -711,6 +712,62 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
}
}
+bool
+X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+ switch (MI.getOpcode()) {
+ default: break;
+ case X86::MOVSX16rr8:
+ case X86::MOVZX16rr8:
+ case X86::MOVSX32rr8:
+ case X86::MOVZX32rr8:
+ case X86::MOVSX64rr8:
+ case X86::MOVZX64rr8:
+ if (!TM.getSubtarget<X86Subtarget>().is64Bit())
+ // It's not always legal to reference the low 8-bit of the larger
+ // register in 32-bit mode.
+ return false;
+ case X86::MOVSX32rr16:
+ case X86::MOVZX32rr16:
+ case X86::MOVSX64rr16:
+ case X86::MOVZX64rr16:
+ case X86::MOVSX64rr32:
+ case X86::MOVZX64rr32: {
+ if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+ // Be conservative.
+ return false;
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable(0);
+ break;
+ case X86::MOVSX16rr8:
+ case X86::MOVZX16rr8:
+ case X86::MOVSX32rr8:
+ case X86::MOVZX32rr8:
+ case X86::MOVSX64rr8:
+ case X86::MOVZX64rr8:
+ SubIdx = 1;
+ break;
+ case X86::MOVSX32rr16:
+ case X86::MOVZX32rr16:
+ case X86::MOVSX64rr16:
+ case X86::MOVZX64rr16:
+ SubIdx = 3;
+ break;
+ case X86::MOVSX64rr32:
+ case X86::MOVZX64rr32:
+ SubIdx = 4;
+ break;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
/// isFrameOperand - Return true and the FrameIndex if the specified
/// operand and follow operands form a reference to the stack frame.
bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
@@ -1018,12 +1075,16 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
switch (Opc) {
default: break;
case X86::MOV8r0:
- case X86::MOV32r0: {
+ case X86::MOV16r0:
+ case X86::MOV32r0:
+ case X86::MOV64r0: {
if (!isSafeToClobberEFLAGS(MBB, I)) {
switch (Opc) {
default: break;
case X86::MOV8r0: Opc = X86::MOV8ri; break;
+ case X86::MOV16r0: Opc = X86::MOV16ri; break;
case X86::MOV32r0: Opc = X86::MOV32ri; break;
+ case X86::MOV64r0: Opc = X86::MOV64ri; break;
}
Clone = false;
}
@@ -2290,8 +2351,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
isTwoAddrFold = true;
} else if (i == 0) { // If operand 0
- if (MI->getOpcode() == X86::MOV32r0)
+ if (MI->getOpcode() == X86::MOV64r0)
+ NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV32r0)
NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV16r0)
+ NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
else if (MI->getOpcode() == X86::MOV8r0)
NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
if (NewMI)
@@ -2354,7 +2419,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// No fusion
if (PrintFailedFusing)
- errs() << "We failed to fuse operand " << i << " in " << *MI;
+ dbgs() << "We failed to fuse operand " << i << " in " << *MI;
return NULL;
}
@@ -2559,7 +2624,9 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
} else if (OpNum == 0) { // If operand 0
switch (Opc) {
case X86::MOV8r0:
+ case X86::MOV16r0:
case X86::MOV32r0:
+ case X86::MOV64r0:
return true;
default: break;
}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index b83441d..0ab85f4 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -448,6 +448,16 @@ public:
unsigned &SrcReg, unsigned &DstReg,
unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+ /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
+ /// extension instruction. That is, it's like a copy where it's legal for the
+ /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
+ /// true, then it's expected the pre-extension value is available as a subreg
+ /// of the result register. This also returns the sub-register index in
+ /// SubIdx.
+ virtual bool isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const;
+
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
/// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
/// stack locations as well. This uses a heuristic so it isn't
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 4d922a5..396cb53 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -160,15 +160,21 @@ def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
[SDNPHasChain, SDNPOptInFlag]>;
-def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags>;
+def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
-def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags>;
-def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags>;
+def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,
+ [SDNPCommutative]>;
def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
-def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags>;
-def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags>;
-def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags>;
+def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
+def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
+ [SDNPCommutative]>;
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
@@ -487,6 +493,21 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
return N->hasOneUse();
}]>;
+// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
+def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
+ else {
+ unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero0, KnownOne0;
+ CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+ APInt KnownZero1, KnownOne1;
+ CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+ return (~KnownZero0 & ~KnownZero1) == 0;
+ }
+}]>;
+
// 'shld' and 'shrd' instruction patterns. Note that even though these have
// the srl and shl in their patterns, the C++ code must still check for them,
// because predicates are tested before children nodes are explored.
@@ -3700,18 +3721,21 @@ let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
"xor{b}\t$dst, $dst",
[(set GR8:$dst, 0)]>;
+
+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
+// encoding and avoids a partial-register update sometimes, but doing so
+// at isel time interferes with rematerialization in the current register
+// allocator. For now, this is rewritten when the instruction is lowered
+// to an MCInst.
+def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+ "",
+ [(set GR16:$dst, 0)]>, OpSize;
def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins),
"xor{l}\t$dst, $dst",
[(set GR32:$dst, 0)]>;
}
-// Use xorl instead of xorw since we don't care about the high 16 bits,
-// it's smaller, and it avoids a partial-register update.
-let AddedComplexity = 1 in
-def : Pat<(i16 0),
- (EXTRACT_SUBREG (MOV32r0), x86_subreg_16bit)>;
-
//===----------------------------------------------------------------------===//
// Thread Local Storage Instructions
//
@@ -3792,7 +3816,7 @@ def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
[(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
}
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
-def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i32mem:$ptr),
+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
"lock\n\t"
"cmpxchg8b\t$ptr",
[(X86cas8 addr:$ptr)]>, TB, LOCK;
@@ -3858,6 +3882,7 @@ def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
"cmpxchg8b\t$dst", []>, TB;
@@ -4466,7 +4491,7 @@ def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
x86_subreg_8bit_hi)>,
Requires<[In32BitMode]>;
-def : Pat<(srl_su GR16:$src, (i8 8)),
+def : Pat<(srl GR16:$src, (i8 8)),
(EXTRACT_SUBREG
(MOVZX32rr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
@@ -4640,6 +4665,28 @@ def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C32r)>;
+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
+let AddedComplexity = 5 in { // Try this before the selecting to OR
+def : Pat<(parallel (or_is_add GR16:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (or_is_add GR32:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (or_is_add GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (or_is_add GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(parallel (or_is_add GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)),
+ (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)),
+ (ADD32rr GR32:$src1, GR32:$src2)>;
+} // AddedComplexity
+
//===----------------------------------------------------------------------===//
// EFLAGS-defining Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b26e508..94b9b55 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -131,11 +131,13 @@ def alignedloadv2i64 : PatFrag<(ops node:$ptr),
// Like 'load', but uses special alignment checks suitable for use in
// memory operands in most SSE instructions, which are required to
-// be naturally aligned on some targets but not on others.
-// FIXME: Actually implement support for targets that don't require the
-// alignment. This probably wants a subtarget predicate.
+// be naturally aligned on some targets but not on others. If the subtarget
+// allows unaligned accesses, match any load, though this may require
+// setting a feature bit in the processor (on startup, for example).
+// Opteron 10h and later implement such a feature.
def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return cast<LoadSDNode>(N)->getAlignment() >= 16;
+ return Subtarget->hasVectorUAMem()
+ || cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index c69cc83..f363903 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -348,7 +348,7 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
#endif
#if 0
- DEBUG(errs() << "In callback! Addr=" << (void*)RetAddr
+ DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr
<< " ESP=" << (void*)StackPtr
<< ": Resolving call to function: "
<< TheVM->getFunctionReferencedName((void*)RetAddr) << "\n");
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index d96aafd..9bd96af 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -591,6 +591,15 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FrameIndex = MI.getOperand(i).getIndex();
unsigned BasePtr;
+ // DEBUG_VALUE has a special representation, and is only robust enough to
+ // represent SP(or BP) +- offset addressing modes. We rewrite the
+ // FrameIndex to be a constant; implicitly positive constants are relative
+ // to ESP and negative ones to EBP.
+ if (MI.getOpcode()==TargetInstrInfo::DEBUG_VALUE) {
+ MI.getOperand(i).ChangeToImmediate(getFrameIndexOffset(MF, FrameIndex));
+ return 0;
+ }
+
if (needsStackRealignment(MF))
BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
else
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 75cdbad..2039be7 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -286,6 +286,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, HasFMA3(false)
, HasFMA4(false)
, IsBTMemSlow(false)
+ , HasVectorUAMem(false)
, DarwinVers(0)
, stackAlignment(8)
// FIXME: this is a known good value for Yonah. How about others?
@@ -317,7 +318,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
if (Is64Bit)
HasX86_64 = true;
- DEBUG(errs() << "Subtarget features: SSELevel " << X86SSELevel
+ DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
<< ", 3DNowLevel " << X863DNowLevel
<< ", 64bit " << HasX86_64 << "\n");
assert((!Is64Bit || HasX86_64) &&
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index ef6dbaf..618dd10 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -78,6 +78,10 @@ protected:
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
+ /// HasVectorUAMem - True if SIMD operations can have unaligned memory operands.
+ /// This may require setting a feature bit in the processor.
+ bool HasVectorUAMem;
+
/// DarwinVers - Nonzero if this is a darwin platform: the numeric
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
unsigned char DarwinVers; // Is any darwin-x86 platform.
@@ -142,6 +146,7 @@ public:
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
bool isBTMemSlow() const { return IsBTMemSlow; }
+ bool hasVectorUAMem() const { return HasVectorUAMem; }
bool isTargetDarwin() const { return TargetType == isDarwin; }
bool isTargetELF() const { return TargetType == isELF; }
@@ -169,7 +174,7 @@ public:
p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64";
else if (isTargetDarwin())
p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32";
- else if (isTargetCygMing() || isTargetWindows())
+ else if (isTargetMingw() || isTargetWindows())
p = "e-p:32:32-f64:64:64-i64:64:64-f80:128:128-n8:16:32";
else
p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32";
OpenPOWER on IntegriCloud