summaryrefslogtreecommitdiffstats
path: root/lib/Target/X86
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2010-05-27 15:15:58 +0000
committerrdivacky <rdivacky@FreeBSD.org>2010-05-27 15:15:58 +0000
commit1e3dec662ea18131c495db50caccc57f77b7a5fe (patch)
tree9fad9a5d5dd8c4ff54af48edad9c8cc26dd5fda1 /lib/Target/X86
parent377552607e51dc1d3e6ff33833f9620bcfe815ac (diff)
downloadFreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.zip
FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.tar.gz
Update LLVM to r104832.
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp199
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp94
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.h2
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp139
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.h2
-rw-r--r--lib/Target/X86/CMakeLists.txt7
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp61
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c8
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h33
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h3
-rw-r--r--lib/Target/X86/SSEDomainFix.cpp4
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp86
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h14
-rw-r--r--lib/Target/X86/X86CallingConv.td14
-rw-r--r--lib/Target/X86/X86FastISel.cpp27
-rw-r--r--lib/Target/X86/X86FloatingPointRegKill.cpp110
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp12
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp370
-rw-r--r--lib/Target/X86/X86ISelLowering.h20
-rw-r--r--lib/Target/X86/X86Instr64bit.td155
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp65
-rw-r--r--lib/Target/X86/X86InstrInfo.h15
-rw-r--r--lib/Target/X86/X86InstrInfo.td174
-rw-r--r--lib/Target/X86/X86InstrMMX.td11
-rw-r--r--lib/Target/X86/X86InstrSSE.td162
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp95
-rw-r--r--lib/Target/X86/X86RegisterInfo.h10
-rw-r--r--lib/Target/X86/X86RegisterInfo.td197
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp225
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.h32
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp32
-rw-r--r--lib/Target/X86/X86TargetMachine.h5
32 files changed, 1532 insertions, 851 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 6b403c1..40a6a7b 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -186,20 +186,73 @@ struct X86Operand : public MCParsedAsmOperand {
bool isImm() const { return Kind == Immediate; }
- bool isImmSExt8() const {
- // Accept immediates which fit in 8 bits when sign extended, and
- // non-absolute immediates.
+ bool isImmSExti16i8() const {
if (!isImm())
return false;
- if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
- int64_t Value = CE->getValue();
- return Value == (int64_t) (int8_t) Value;
- }
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
- return true;
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000000000007FULL)||
+ (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
}
-
+ bool isImmSExti32i8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000000000007FULL)||
+ (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+ bool isImmSExti64i8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000000000007FULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+ bool isImmSExti64i32() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000007FFFFFFFULL)||
+ (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+
bool isMem() const { return Kind == Memory; }
bool isAbsMem() const {
@@ -231,12 +284,6 @@ struct X86Operand : public MCParsedAsmOperand {
addExpr(Inst, getImm());
}
- void addImmSExt8Operands(MCInst &Inst, unsigned N) const {
- // FIXME: Support user customization of the render method.
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
void addMemOperands(MCInst &Inst, unsigned N) const {
assert((N == 5) && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
@@ -535,6 +582,21 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
bool X86ATTAsmParser::
ParseInstruction(const StringRef &Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // The various flavors of pushf and popf use Requires<In32BitMode> and
+ // Requires<In64BitMode>, but the assembler doesn't yet implement that.
+ // For now, just do a manual check to prevent silent misencoding.
+ if (Is64Bit) {
+ if (Name == "popfl")
+ return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
+ else if (Name == "pushfl")
+ return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
+ } else {
+ if (Name == "popfq")
+ return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
+ else if (Name == "pushfq")
+ return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
+ }
+
// FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
// represent alternative syntaxes in the .td file, without requiring
// instruction duplication.
@@ -547,9 +609,66 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
.Case("repe", "rep")
.Case("repz", "rep")
.Case("repnz", "repne")
+ .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
+ .Case("popf", Is64Bit ? "popfq" : "popfl")
+ .Case("retl", Is64Bit ? "retl" : "ret")
+ .Case("retq", Is64Bit ? "ret" : "retq")
+ .Case("setz", "sete")
+ .Case("setnz", "setne")
+ .Case("jz", "je")
+ .Case("jnz", "jne")
+ .Case("cmovcl", "cmovbl")
+ .Case("cmovcl", "cmovbl")
+ .Case("cmovnal", "cmovbel")
+ .Case("cmovnbl", "cmovael")
+ .Case("cmovnbel", "cmoval")
+ .Case("cmovncl", "cmovael")
+ .Case("cmovngl", "cmovlel")
+ .Case("cmovnl", "cmovgel")
+ .Case("cmovngl", "cmovlel")
+ .Case("cmovngel", "cmovll")
+ .Case("cmovnll", "cmovgel")
+ .Case("cmovnlel", "cmovgl")
+ .Case("cmovnzl", "cmovnel")
+ .Case("cmovzl", "cmovel")
.Default(Name);
+
+ // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
+ const MCExpr *ExtraImmOp = 0;
+ if (PatchedName.startswith("cmp") &&
+ (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
+ PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
+ unsigned SSEComparisonCode = StringSwitch<unsigned>(
+ PatchedName.slice(3, PatchedName.size() - 2))
+ .Case("eq", 0)
+ .Case("lt", 1)
+ .Case("le", 2)
+ .Case("unord", 3)
+ .Case("neq", 4)
+ .Case("nlt", 5)
+ .Case("nle", 6)
+ .Case("ord", 7)
+ .Default(~0U);
+ if (SSEComparisonCode != ~0U) {
+ ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
+ getParser().getContext());
+ if (PatchedName.endswith("ss")) {
+ PatchedName = "cmpss";
+ } else if (PatchedName.endswith("sd")) {
+ PatchedName = "cmpsd";
+ } else if (PatchedName.endswith("ps")) {
+ PatchedName = "cmpps";
+ } else {
+ assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
+ PatchedName = "cmppd";
+ }
+ }
+ }
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
+ if (ExtraImmOp)
+ Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
+
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Parse '*' modifier.
@@ -564,7 +683,7 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
Operands.push_back(Op);
else
return true;
-
+
while (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
@@ -587,6 +706,17 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
Operands.erase(Operands.begin() + 1);
}
+ // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
+ // "f{mul*,add*,sub*,div*} $op"
+ if ((Name.startswith("fmul") || Name.startswith("fadd") ||
+ Name.startswith("fsub") || Name.startswith("fdiv")) &&
+ Operands.size() == 3 &&
+ static_cast<X86Operand*>(Operands[2])->isReg() &&
+ static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
+ delete Operands[2];
+ Operands.erase(Operands.begin() + 2);
+ }
+
return false;
}
@@ -622,6 +752,31 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
+/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a
+/// imm operand, to having "rm" or "mr" operands with the offset in the disp
+/// field.
+static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo,
+ bool isMR) {
+ MCOperand Disp = Inst.getOperand(0);
+
+ // Start over with an empty instruction.
+ Inst = MCInst();
+ Inst.setOpcode(Opc);
+
+ if (!isMR)
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
+
+ // Add the mem operand.
+ Inst.addOperand(MCOperand::CreateReg(0)); // Segment
+ Inst.addOperand(MCOperand::CreateImm(1)); // Scale
+ Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg
+ Inst.addOperand(Disp); // Displacement
+ Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg
+
+ if (isMR)
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
+}
+
// FIXME: Custom X86 cleanup function to implement a temporary hack to handle
// matching INCL/DECL correctly for x86_64. This needs to be replaced by a
// proper mechanism for supporting (ambiguous) feature dependent instructions.
@@ -637,6 +792,14 @@ void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) {
case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break;
case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break;
case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break;
+
+ // moffset instructions are x86-32 only.
+ case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break;
+ case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break;
+ case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break;
+ case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break;
+ case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break;
+ case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break;
}
}
@@ -673,6 +836,8 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
bool MatchW = MatchInstructionImpl(Operands, Inst);
Tmp[Base.size()] = 'l';
bool MatchL = MatchInstructionImpl(Operands, Inst);
+ Tmp[Base.size()] = 'q';
+ bool MatchQ = MatchInstructionImpl(Operands, Inst);
// Restore the old token.
Op->setTokenValue(Base);
@@ -680,7 +845,7 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
// If exactly one matched, then we treat that as a successful match (and the
// instruction will already have been filled in correctly, since the failing
// matches won't have modified it).
- if (MatchB + MatchW + MatchL == 2)
+ if (MatchB + MatchW + MatchL + MatchQ == 3)
return false;
// Otherwise, the match failed.
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index 8b0ed1c..183213d 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -58,12 +58,11 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
if (Subtarget->isTargetCOFF()) {
- const Function *F = MF.getFunction();
- OutStreamer.EmitRawText("\t.def\t " + Twine(CurrentFnSym->getName()) +
- ";\t.scl\t" +
- Twine(F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) +
- ";\t.type\t" + Twine(COFF::DT_FCN << COFF::N_BTSHFT)
- + ";\t.endef");
+ bool Intrn = MF.getFunction()->hasInternalLinkage();
+ OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::C_STAT : COFF::C_EXT);
+ OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+ OutStreamer.EndCOFFSymbolDef();
}
// Have common code print out the function header with linkage info etc.
@@ -571,44 +570,55 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
// Emit type information for external functions
- for (X86COFFMachineModuleInfo::stub_iterator I = COFFMMI.stub_begin(),
- E = COFFMMI.stub_end(); I != E; ++I) {
- OutStreamer.EmitRawText("\t.def\t " + Twine(I->getKeyData()) +
- ";\t.scl\t" + Twine(COFF::C_EXT) +
- ";\t.type\t" +
- Twine(COFF::DT_FCN << COFF::N_BTSHFT) +
- ";\t.endef");
+ typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator;
+ for (externals_iterator I = COFFMMI.externals_begin(),
+ E = COFFMMI.externals_end();
+ I != E; ++I) {
+ OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer.EmitCOFFSymbolStorageClass(COFF::C_EXT);
+ OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+ OutStreamer.EndCOFFSymbolDef();
}
- if (Subtarget->isTargetCygMing()) {
- // Necessary for dllexport support
- std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
-
- const TargetLoweringObjectFileCOFF &TLOFCOFF =
- static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
-
- for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (I->hasDLLExportLinkage())
- DLLExportedFns.push_back(Mang->getSymbol(I));
-
- for (Module::const_global_iterator I = M.global_begin(),
- E = M.global_end(); I != E; ++I)
- if (I->hasDLLExportLinkage())
- DLLExportedGlobals.push_back(Mang->getSymbol(I));
-
- // Output linker support code for dllexported globals on windows.
- if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
- OutStreamer.SwitchSection(TLOFCOFF.getCOFFSection(".section .drectve",
- true,
- SectionKind::getMetadata()));
- for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i)
- OutStreamer.EmitRawText("\t.ascii \" -export:" +
- Twine(DLLExportedGlobals[i]->getName()) +
- ",data\"");
-
- for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i)
- OutStreamer.EmitRawText("\t.ascii \" -export:" +
- Twine(DLLExportedFns[i]->getName()) + "\"");
+ // Necessary for dllexport support
+ std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
+
+ const TargetLoweringObjectFileCOFF &TLOFCOFF =
+ static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->hasDLLExportLinkage())
+ DLLExportedFns.push_back(Mang->getSymbol(I));
+
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I)
+ if (I->hasDLLExportLinkage())
+ DLLExportedGlobals.push_back(Mang->getSymbol(I));
+
+ // Output linker support code for dllexported globals on windows.
+ if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
+ OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection());
+ SmallString<128> name;
+ for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) {
+ if (Subtarget->isTargetWindows())
+ name = " /EXPORT:";
+ else
+ name = " -export:";
+ name += DLLExportedGlobals[i]->getName();
+ if (Subtarget->isTargetWindows())
+ name += ",DATA";
+ else
+ name += ",data";
+ OutStreamer.EmitBytes(name, 0);
+ }
+
+ for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
+ if (Subtarget->isTargetWindows())
+ name = " /EXPORT:";
+ else
+ name = " -export:";
+ name += DLLExportedFns[i]->getName();
+ OutStreamer.EmitBytes(name, 0);
}
}
}
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
index 95984b2..b5a7f8d 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
@@ -31,7 +31,7 @@ class MCInst;
class MCStreamer;
class MCSymbol;
-class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter {
+class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
const X86Subtarget *Subtarget;
public:
explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index effc8ed..4edeca9 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -224,6 +224,60 @@ static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) {
OutMI.addOperand(OutMI.getOperand(0));
}
+/// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
+/// a short fixed-register form.
+static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
+ unsigned ImmOp = Inst.getNumOperands() - 1;
+ assert(Inst.getOperand(0).isReg() && Inst.getOperand(ImmOp).isImm() &&
+ ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
+ Inst.getNumOperands() == 2) && "Unexpected instruction!");
+
+ // Check whether the destination register can be fixed.
+ unsigned Reg = Inst.getOperand(0).getReg();
+ if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+ return;
+
+ // If so, rewrite the instruction.
+ MCOperand Saved = Inst.getOperand(ImmOp);
+ Inst = MCInst();
+ Inst.setOpcode(Opcode);
+ Inst.addOperand(Saved);
+}
+
+/// \brief Simplify things like MOV32rm to MOV32o32a.
+static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) {
+ bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
+ unsigned AddrBase = IsStore;
+ unsigned RegOp = IsStore ? 0 : 5;
+ unsigned AddrOp = AddrBase + 3;
+ assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
+ Inst.getOperand(AddrBase + 0).isReg() && // base
+ Inst.getOperand(AddrBase + 1).isImm() && // scale
+ Inst.getOperand(AddrBase + 2).isReg() && // index register
+ (Inst.getOperand(AddrOp).isExpr() || // address
+ Inst.getOperand(AddrOp).isImm())&&
+ Inst.getOperand(AddrBase + 4).isReg() && // segment
+ "Unexpected instruction!");
+
+ // Check whether the destination register can be fixed.
+ unsigned Reg = Inst.getOperand(RegOp).getReg();
+ if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+ return;
+
+ // Check whether this is an absolute address.
+ if (Inst.getOperand(AddrBase + 0).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 2).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 4).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 1).getImm() != 1)
+ return;
+
+ // If so, rewrite the instruction.
+ MCOperand Saved = Inst.getOperand(AddrOp);
+ Inst = MCInst();
+ Inst.setOpcode(Opcode);
+ Inst.addOperand(Saved);
+}
void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
@@ -309,8 +363,32 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV64r0 -> MOV32r0
LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
break;
-
-
+
+ // TAILJMPr, TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have
+ // register inputs modeled as normal uses instead of implicit uses. As such,
+ // truncate off all but the first operand (the callee). FIXME: Change isel.
+ case X86::TAILJMPr:
+ case X86::TAILJMPr64:
+ case X86::CALL64r:
+ case X86::CALL64pcrel32: {
+ unsigned Opcode = OutMI.getOpcode();
+ MCOperand Saved = OutMI.getOperand(0);
+ OutMI = MCInst();
+ OutMI.setOpcode(Opcode);
+ OutMI.addOperand(Saved);
+ break;
+ }
+
+ // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64: {
+ MCOperand Saved = OutMI.getOperand(0);
+ OutMI = MCInst();
+ OutMI.setOpcode(X86::TAILJMP_1);
+ OutMI.addOperand(Saved);
+ break;
+ }
+
// The assembler backend wants to see branches in their small form and relax
// them to their large form. The JIT can only handle the large form because
// it does not do relaxation. For now, translate the large form to the
@@ -332,6 +410,61 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break;
case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break;
case X86::JG_4: OutMI.setOpcode(X86::JG_1); break;
+
+ // We don't currently select the correct instruction form for instructions
+ // which have a short %eax, etc. form. Handle this by custom lowering, for
+ // now.
+ //
+ // Note, we are currently not handling the following instructions:
+ // MOV64ao8, MOV64o8a
+ // XCHG16ar, XCHG32ar, XCHG64ar
+ case X86::MOV8mr_NOREX:
+ case X86::MOV8mr: SimplifyShortMoveForm(OutMI, X86::MOV8ao8); break;
+ case X86::MOV8rm_NOREX:
+ case X86::MOV8rm: SimplifyShortMoveForm(OutMI, X86::MOV8o8a); break;
+ case X86::MOV16mr: SimplifyShortMoveForm(OutMI, X86::MOV16ao16); break;
+ case X86::MOV16rm: SimplifyShortMoveForm(OutMI, X86::MOV16o16a); break;
+ case X86::MOV32mr: SimplifyShortMoveForm(OutMI, X86::MOV32ao32); break;
+ case X86::MOV32rm: SimplifyShortMoveForm(OutMI, X86::MOV32o32a); break;
+ case X86::MOV64mr: SimplifyShortMoveForm(OutMI, X86::MOV64ao64); break;
+ case X86::MOV64rm: SimplifyShortMoveForm(OutMI, X86::MOV64o64a); break;
+
+ case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break;
+ case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break;
+ case X86::ADC32ri: SimplifyShortImmForm(OutMI, X86::ADC32i32); break;
+ case X86::ADC64ri32: SimplifyShortImmForm(OutMI, X86::ADC64i32); break;
+ case X86::ADD8ri: SimplifyShortImmForm(OutMI, X86::ADD8i8); break;
+ case X86::ADD16ri: SimplifyShortImmForm(OutMI, X86::ADD16i16); break;
+ case X86::ADD32ri: SimplifyShortImmForm(OutMI, X86::ADD32i32); break;
+ case X86::ADD64ri32: SimplifyShortImmForm(OutMI, X86::ADD64i32); break;
+ case X86::AND8ri: SimplifyShortImmForm(OutMI, X86::AND8i8); break;
+ case X86::AND16ri: SimplifyShortImmForm(OutMI, X86::AND16i16); break;
+ case X86::AND32ri: SimplifyShortImmForm(OutMI, X86::AND32i32); break;
+ case X86::AND64ri32: SimplifyShortImmForm(OutMI, X86::AND64i32); break;
+ case X86::CMP8ri: SimplifyShortImmForm(OutMI, X86::CMP8i8); break;
+ case X86::CMP16ri: SimplifyShortImmForm(OutMI, X86::CMP16i16); break;
+ case X86::CMP32ri: SimplifyShortImmForm(OutMI, X86::CMP32i32); break;
+ case X86::CMP64ri32: SimplifyShortImmForm(OutMI, X86::CMP64i32); break;
+ case X86::OR8ri: SimplifyShortImmForm(OutMI, X86::OR8i8); break;
+ case X86::OR16ri: SimplifyShortImmForm(OutMI, X86::OR16i16); break;
+ case X86::OR32ri: SimplifyShortImmForm(OutMI, X86::OR32i32); break;
+ case X86::OR64ri32: SimplifyShortImmForm(OutMI, X86::OR64i32); break;
+ case X86::SBB8ri: SimplifyShortImmForm(OutMI, X86::SBB8i8); break;
+ case X86::SBB16ri: SimplifyShortImmForm(OutMI, X86::SBB16i16); break;
+ case X86::SBB32ri: SimplifyShortImmForm(OutMI, X86::SBB32i32); break;
+ case X86::SBB64ri32: SimplifyShortImmForm(OutMI, X86::SBB64i32); break;
+ case X86::SUB8ri: SimplifyShortImmForm(OutMI, X86::SUB8i8); break;
+ case X86::SUB16ri: SimplifyShortImmForm(OutMI, X86::SUB16i16); break;
+ case X86::SUB32ri: SimplifyShortImmForm(OutMI, X86::SUB32i32); break;
+ case X86::SUB64ri32: SimplifyShortImmForm(OutMI, X86::SUB64i32); break;
+ case X86::TEST8ri: SimplifyShortImmForm(OutMI, X86::TEST8i8); break;
+ case X86::TEST16ri: SimplifyShortImmForm(OutMI, X86::TEST16i16); break;
+ case X86::TEST32ri: SimplifyShortImmForm(OutMI, X86::TEST32i32); break;
+ case X86::TEST64ri32: SimplifyShortImmForm(OutMI, X86::TEST64i32); break;
+ case X86::XOR8ri: SimplifyShortImmForm(OutMI, X86::XOR8i8); break;
+ case X86::XOR16ri: SimplifyShortImmForm(OutMI, X86::XOR16i16); break;
+ case X86::XOR32ri: SimplifyShortImmForm(OutMI, X86::XOR32i32); break;
+ case X86::XOR64ri32: SimplifyShortImmForm(OutMI, X86::XOR64i32); break;
}
}
@@ -346,7 +479,7 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
// cast away const; DIetc do not take const operands for some reason.
DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
if (V.getContext().isSubprogram())
- O << DISubprogram(V.getContext().getNode()).getDisplayName() << ":";
+ O << DISubprogram(V.getContext()).getDisplayName() << ":";
O << V.getName();
O << " <- ";
// Frame address. Currently handles register +- offset only.
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
index ebd23f6..9e5474f 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
@@ -25,7 +25,7 @@ namespace llvm {
class X86Subtarget;
/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
-class VISIBILITY_HIDDEN X86MCInstLower {
+class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
MCContext &Ctx;
Mangler *Mang;
X86AsmPrinter &AsmPrinter;
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index da6bb91..1334820 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -39,7 +39,12 @@ set(sources
if( CMAKE_CL_64 )
enable_language(ASM_MASM)
- set(sources ${sources} X86CompilationCallback_Win64.asm)
+ ADD_CUSTOM_COMMAND(
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj
+ COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
+ )
+ set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj)
endif()
add_llvm_target(X86CodeGen ${sources})
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 62e7357..8a5a630 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -155,7 +155,57 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
///
/// @param mcInst - The MCInst to append to.
/// @param immediate - The immediate value to append.
-static void translateImmediate(MCInst &mcInst, uint64_t immediate) {
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The internal instruction.
+static void translateImmediate(MCInst &mcInst,
+ uint64_t immediate,
+ OperandSpecifier &operand,
+ InternalInstruction &insn) {
+ // Sign-extend the immediate if necessary.
+
+ OperandType type = operand.type;
+
+ if (type == TYPE_RELv) {
+ switch (insn.displacementSize) {
+ default:
+ break;
+ case 8:
+ type = TYPE_MOFFS8;
+ break;
+ case 16:
+ type = TYPE_MOFFS16;
+ break;
+ case 32:
+ type = TYPE_MOFFS32;
+ break;
+ case 64:
+ type = TYPE_MOFFS64;
+ break;
+ }
+ }
+
+ switch (type) {
+ case TYPE_MOFFS8:
+ case TYPE_REL8:
+ if(immediate & 0x80)
+ immediate |= ~(0xffull);
+ break;
+ case TYPE_MOFFS16:
+ if(immediate & 0x8000)
+ immediate |= ~(0xffffull);
+ break;
+ case TYPE_MOFFS32:
+ case TYPE_REL32:
+ case TYPE_REL64:
+ if(immediate & 0x80000000)
+ immediate |= ~(0xffffffffull);
+ break;
+ case TYPE_MOFFS64:
+ default:
+ // operand is 64 bits wide. Do nothing.
+ break;
+ }
+
mcInst.addOperand(MCOperand::CreateImm(immediate));
}
@@ -370,8 +420,7 @@ static bool translateRM(MCInst &mcInst,
case TYPE_XMM64:
case TYPE_XMM128:
case TYPE_DEBUGREG:
- case TYPE_CR32:
- case TYPE_CR64:
+ case TYPE_CONTROLREG:
return translateRMRegister(mcInst, insn);
case TYPE_M:
case TYPE_M8:
@@ -447,8 +496,10 @@ static bool translateOperand(MCInst &mcInst,
case ENCODING_IO:
case ENCODING_Iv:
case ENCODING_Ia:
- translateImmediate(mcInst,
- insn.immediates[insn.numImmediatesTranslated++]);
+ translateImmediate(mcInst,
+ insn.immediates[insn.numImmediatesTranslated++],
+ operand,
+ insn);
return false;
case ENCODING_RB:
case ENCODING_RW:
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 64f6b2d..6c3ff6b 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1034,14 +1034,10 @@ static int readModRM(struct InternalInstruction* insn) {
if (index > 7) \
*valid = 0; \
return prefix##_DR0 + index; \
- case TYPE_CR32: \
- if (index > 7) \
- *valid = 0; \
- return prefix##_ECR0 + index; \
- case TYPE_CR64: \
+ case TYPE_CONTROLREG: \
if (index > 8) \
*valid = 0; \
- return prefix##_RCR0 + index; \
+ return prefix##_CR0 + index; \
} \
}
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 462cf68..28ba86b 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -225,26 +225,16 @@ extern "C" {
ENTRY(DR6) \
ENTRY(DR7)
-#define REGS_CONTROL_32BIT \
- ENTRY(ECR0) \
- ENTRY(ECR1) \
- ENTRY(ECR2) \
- ENTRY(ECR3) \
- ENTRY(ECR4) \
- ENTRY(ECR5) \
- ENTRY(ECR6) \
- ENTRY(ECR7)
-
-#define REGS_CONTROL_64BIT \
- ENTRY(RCR0) \
- ENTRY(RCR1) \
- ENTRY(RCR2) \
- ENTRY(RCR3) \
- ENTRY(RCR4) \
- ENTRY(RCR5) \
- ENTRY(RCR6) \
- ENTRY(RCR7) \
- ENTRY(RCR8)
+#define REGS_CONTROL \
+ ENTRY(CR0) \
+ ENTRY(CR1) \
+ ENTRY(CR2) \
+ ENTRY(CR3) \
+ ENTRY(CR4) \
+ ENTRY(CR5) \
+ ENTRY(CR6) \
+ ENTRY(CR7) \
+ ENTRY(CR8)
#define ALL_EA_BASES \
EA_BASES_16BIT \
@@ -264,8 +254,7 @@ extern "C" {
REGS_XMM \
REGS_SEGMENT \
REGS_DEBUG \
- REGS_CONTROL_32BIT \
- REGS_CONTROL_64BIT \
+ REGS_CONTROL \
ENTRY(RIP)
/*
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 4a7cd57..0f33f52 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -280,8 +280,7 @@ struct ContextDecision {
ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
- ENUM_ENTRY(TYPE_CR32, "4-byte control register operand") \
- ENUM_ENTRY(TYPE_CR64, "8-byte") \
+ ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \
\
ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp
index 5e80845..dab070e 100644
--- a/lib/Target/X86/SSEDomainFix.cpp
+++ b/lib/Target/X86/SSEDomainFix.cpp
@@ -155,9 +155,7 @@ char SSEDomainFixPass::ID = 0;
/// Translate TRI register number to an index into our smaller tables of
/// interesting registers. Return -1 for boring registers.
int SSEDomainFixPass::RegIndex(unsigned reg) {
- // Registers are sorted lexicographically.
- // We just need them to be consecutive, ordering doesn't matter.
- assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort");
+ assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort");
reg -= X86::XMM0;
return reg < NumRegs ? (int) reg : -1;
}
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index ba9c1d0..151087f 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -12,6 +12,7 @@
#include "X86FixupKinds.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
@@ -43,20 +44,19 @@ public:
X86AsmBackend(const Target &T)
: TargetAsmBackend(T) {}
- void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF,
+ void ApplyFixup(const MCFixup &Fixup, MCDataFragment &DF,
uint64_t Value) const {
- unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind);
+ unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
- assert(Fixup.Offset + Size <= DF.getContents().size() &&
+ assert(Fixup.getOffset() + Size <= DF.getContents().size() &&
"Invalid fixup offset!");
for (unsigned i = 0; i != Size; ++i)
- DF.getContents()[Fixup.Offset + i] = uint8_t(Value >> (i * 8));
+ DF.getContents()[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
}
- bool MayNeedRelaxation(const MCInst &Inst,
- const SmallVectorImpl<MCAsmFixup> &Fixups) const;
+ bool MayNeedRelaxation(const MCInst &Inst) const;
- void RelaxInstruction(const MCInstFragment *IF, MCInst &Res) const;
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
};
@@ -75,6 +75,7 @@ static unsigned getRelaxedOpcode(unsigned Op) {
case X86::JG_1: return X86::JG_4;
case X86::JLE_1: return X86::JLE_4;
case X86::JL_1: return X86::JL_4;
+ case X86::TAILJMP_1:
case X86::JMP_1: return X86::JMP_4;
case X86::JNE_1: return X86::JNE_4;
case X86::JNO_1: return X86::JNO_4;
@@ -86,35 +87,33 @@ static unsigned getRelaxedOpcode(unsigned Op) {
}
}
-bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst,
- const SmallVectorImpl<MCAsmFixup> &Fixups) const {
- // Check for a 1byte pcrel fixup, and enforce that we would know how to relax
- // this instruction.
- for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
- if (unsigned(Fixups[i].Kind) == X86::reloc_pcrel_1byte) {
- assert(getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode());
- return true;
- }
- }
+bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+ // Check if this instruction is ever relaxable.
+ if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+ return false;
- return false;
+ // If so, just assume it can be relaxed. Once we support relaxing more complex
+ // instructions we should check that the instruction actually has symbolic
+ // operands before doing this, but we need to be careful about things like
+ // PCrel.
+ return true;
}
// FIXME: Can tblgen help at all here to verify there aren't other instructions
// we can relax?
-void X86AsmBackend::RelaxInstruction(const MCInstFragment *IF,
- MCInst &Res) const {
+void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
// The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
- unsigned RelaxedOp = getRelaxedOpcode(IF->getInst().getOpcode());
+ unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
- if (RelaxedOp == IF->getInst().getOpcode()) {
+ if (RelaxedOp == Inst.getOpcode()) {
SmallString<256> Tmp;
raw_svector_ostream OS(Tmp);
- IF->getInst().dump_pretty(OS);
+ Inst.dump_pretty(OS);
+ OS << "\n";
report_fatal_error("unexpected instruction to relax: " + OS.str());
}
- Res = IF->getInst();
+ Res = Inst;
Res.setOpcode(RelaxedOp);
}
@@ -199,6 +198,18 @@ public:
}
};
+class ELFX86_32AsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_32AsmBackend(const Target &T)
+ : ELFX86AsmBackend(T) {}
+};
+
+class ELFX86_64AsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_64AsmBackend(const Target &T)
+ : ELFX86AsmBackend(T) {}
+};
+
class DarwinX86AsmBackend : public X86AsmBackend {
public:
DarwinX86AsmBackend(const Target &T)
@@ -210,7 +221,8 @@ public:
bool isVirtualSection(const MCSection &Section) const {
const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
return (SMO.getType() == MCSectionMachO::S_ZEROFILL ||
- SMO.getType() == MCSectionMachO::S_GB_ZEROFILL);
+ SMO.getType() == MCSectionMachO::S_GB_ZEROFILL ||
+ SMO.getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL);
}
};
@@ -247,6 +259,26 @@ public:
const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
return SMO.getType() == MCSectionMachO::S_CSTRING_LITERALS;
}
+
+ virtual bool isSectionAtomizable(const MCSection &Section) const {
+ const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+ // Fixed sized data sections are uniqued, they cannot be diced into atoms.
+ switch (SMO.getType()) {
+ default:
+ return true;
+
+ case MCSectionMachO::S_4BYTE_LITERALS:
+ case MCSectionMachO::S_8BYTE_LITERALS:
+ case MCSectionMachO::S_16BYTE_LITERALS:
+ case MCSectionMachO::S_LITERAL_POINTERS:
+ case MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS:
+ case MCSectionMachO::S_LAZY_SYMBOL_POINTERS:
+ case MCSectionMachO::S_MOD_INIT_FUNC_POINTERS:
+ case MCSectionMachO::S_MOD_TERM_FUNC_POINTERS:
+ case MCSectionMachO::S_INTERPOSING:
+ return false;
+ }
+ }
};
}
@@ -257,7 +289,7 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
case Triple::Darwin:
return new DarwinX86_32AsmBackend(T);
default:
- return new ELFX86AsmBackend(T);
+ return new ELFX86_32AsmBackend(T);
}
}
@@ -267,6 +299,6 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
case Triple::Darwin:
return new DarwinX86_64AsmBackend(T);
default:
- return new ELFX86AsmBackend(T);
+ return new ELFX86_64AsmBackend(T);
}
}
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index eece462..98ab2a6 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -15,7 +15,7 @@
#define X86COFF_MACHINEMODULEINFO_H
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/DenseSet.h"
#include "X86MachineFunctionInfo.h"
namespace llvm {
@@ -25,18 +25,18 @@ namespace llvm {
/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
/// for X86 COFF targets.
class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
- StringSet<> CygMingStubs;
+ DenseSet<MCSymbol const *> Externals;
public:
X86COFFMachineModuleInfo(const MachineModuleInfo &) {}
virtual ~X86COFFMachineModuleInfo();
- void addExternalFunction(StringRef Name) {
- CygMingStubs.insert(Name);
+ void addExternalFunction(MCSymbol* Symbol) {
+ Externals.insert(Symbol);
}
- typedef StringSet<>::const_iterator stub_iterator;
- stub_iterator stub_begin() const { return CygMingStubs.begin(); }
- stub_iterator stub_end() const { return CygMingStubs.end(); }
+ typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator;
+ externals_iterator externals_begin() const { return Externals.begin(); }
+ externals_iterator externals_end() const { return Externals.end(); }
};
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index fd15efd..a5774e1 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -307,6 +307,20 @@ def CC_X86_32_FastCall : CallingConv<[
CCDelegateTo<CC_X86_32_Common>
]>;
+def CC_X86_32_ThisCall : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in EAX.
+ CCIfNest<CCAssignToReg<[EAX]>>,
+
+ // The first integer argument is passed in ECX
+ CCIfType<[i32], CCAssignToReg<[ECX]>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
def CC_X86_32_FastCC : CallingConv<[
// Handles byval parameters. Note that we can't rely on the delegation
// to CC_X86_32_Common for this because that happens after code that
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index ff9208c..1bc5eb7 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -180,6 +180,8 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
if (CC == CallingConv::X86_FastCall)
return CC_X86_32_FastCall;
+ else if (CC == CallingConv::X86_ThisCall)
+ return CC_X86_32_ThisCall;
else if (CC == CallingConv::Fast)
return CC_X86_32_FastCC;
else if (CC == CallingConv::GHC)
@@ -324,7 +326,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
unsigned Src, EVT SrcVT,
unsigned &ResultReg) {
- unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
+ unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+ Src, /*TODO: Kill=*/false);
if (RR != 0) {
ResultReg = RR;
@@ -416,7 +419,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
(S == 1 || S == 2 || S == 4 || S == 8)) {
// Scaled-index addressing.
Scale = S;
- IndexReg = getRegForGEPIndex(Op);
+ IndexReg = getRegForGEPIndex(Op).first;
if (IndexReg == 0)
return false;
} else
@@ -802,7 +805,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
unsigned ResultReg = getRegForValue(I->getOperand(0));
if (ResultReg == 0) return false;
// Set the high bits to zero.
- ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg);
+ ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
if (ResultReg == 0) return false;
UpdateValueMap(I, ResultReg);
return true;
@@ -913,7 +916,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
const MachineInstr &MI = *RI;
- if (MI.modifiesRegister(Reg)) {
+ if (MI.definesRegister(Reg)) {
unsigned Src, Dst, SrcSR, DstSR;
if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
@@ -1019,14 +1022,14 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
- TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC);
+ TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC, DL);
// The shift instruction uses X86::CL. If we defined a super-register
// of X86::CL, emit an EXTRACT_SUBREG to precisely describe what
// we're doing here.
if (CReg != X86::CL)
BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL)
- .addReg(CReg).addImm(X86::SUBREG_8BIT);
+ .addReg(CReg).addImm(X86::sub_8bit);
unsigned ResultReg = createResultReg(RC);
BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg);
@@ -1133,7 +1136,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
// Then issue an extract_subreg.
unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
- CopyReg, X86::SUBREG_8BIT);
+ CopyReg, /*Kill=*/true,
+ X86::sub_8bit);
if (!ResultReg)
return false;
@@ -1436,7 +1440,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
}
case CCValAssign::BCvt: {
unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(),
- ISD::BIT_CONVERT, Arg);
+ ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false);
assert(BC != 0 && "Failed to emit a bitcast!");
Arg = BC;
ArgVT = VA.getLocVT();
@@ -1447,7 +1451,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (VA.isRegLoc()) {
TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT);
bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(),
- Arg, RC, RC);
+ Arg, RC, RC, DL);
assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
Emitted = true;
RegArgs.push_back(VA.getLocReg());
@@ -1473,7 +1477,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (Subtarget->isPICStyleGOT()) {
TargetRegisterClass *RC = X86::GR32RegisterClass;
unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF);
- bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC);
+ bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC,
+ DL);
assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
Emitted = true;
}
@@ -1552,7 +1557,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
unsigned ResultReg = createResultReg(DstRC);
bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- RVLocs[0].getLocReg(), DstRC, SrcRC);
+ RVLocs[0].getLocReg(), DstRC, SrcRC, DL);
assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
Emitted = true;
if (CopyVT != RVLocs[0].getValVT()) {
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
index 541083f..747683d 100644
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -14,7 +14,6 @@
#define DEBUG_TYPE "x86-codegen"
#include "X86.h"
#include "X86InstrInfo.h"
-#include "X86Subtarget.h"
#include "llvm/Instructions.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -42,12 +41,70 @@ namespace {
virtual bool runOnMachineFunction(MachineFunction &MF);
- virtual const char *getPassName() const { return "X86 FP_REG_KILL inserter"; }
+ virtual const char *getPassName() const {
+ return "X86 FP_REG_KILL inserter";
+ }
};
char FPRegKiller::ID = 0;
}
-FunctionPass *llvm::createX87FPRegKillInserterPass() { return new FPRegKiller(); }
+FunctionPass *llvm::createX87FPRegKillInserterPass() {
+ return new FPRegKiller();
+}
+
+/// isFPStackVReg - Return true if the specified vreg is from a fp stack
+/// register class.
+static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(RegNo))
+ return false;
+
+ switch (MRI.getRegClass(RegNo)->getID()) {
+ default: return false;
+ case X86::RFP32RegClassID:
+ case X86::RFP64RegClassID:
+ case X86::RFP80RegClassID:
+ return true;
+ }
+}
+
+
+/// ContainsFPStackCode - Return true if the specific MBB has floating point
+/// stack code, and thus needs an FP_REG_KILL.
+static bool ContainsFPStackCode(MachineBasicBlock *MBB,
+ const MachineRegisterInfo &MRI) {
+ // Scan the block, looking for instructions that define fp stack vregs.
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (I->getNumOperands() == 0 || !I->getOperand(0).isReg())
+ continue;
+
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+ if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef())
+ continue;
+
+ if (isFPStackVReg(I->getOperand(op).getReg(), MRI))
+ return true;
+ }
+ }
+
+ // Check PHI nodes in successor blocks. These PHI's will be lowered to have
+ // a copy of the input value in this block, which is a definition of the
+ // value.
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ E = MBB->succ_end(); SI != E; ++ SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end();
+ I != E; ++I) {
+ // All PHI nodes are at the top of the block.
+ if (!I->isPHI()) break;
+
+ if (isFPStackVReg(I->getOperand(0).getReg(), MRI))
+ return true;
+ }
+ }
+
+ return false;
+}
bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
// If we are emitting FP stack code, scan the basic block to determine if this
@@ -65,14 +122,13 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
// Fast-path: If nothing is using the x87 registers, we don't need to do
// any scanning.
- MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() &&
MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() &&
MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty())
return false;
bool Changed = false;
- const X86Subtarget &Subtarget = MF.getTarget().getSubtarget<X86Subtarget>();
MachineFunction::iterator MBBI = MF.begin();
MachineFunction::iterator EndMBB = MF.end();
for (; MBBI != EndMBB; ++MBBI) {
@@ -87,48 +143,8 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- bool ContainsFPCode = false;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- !ContainsFPCode && I != E; ++I) {
- if (I->getNumOperands() != 0 && I->getOperand(0).isReg()) {
- const TargetRegisterClass *clas;
- for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
- if (I->getOperand(op).isReg() && I->getOperand(op).isDef() &&
- TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
- ((clas = MRI.getRegClass(I->getOperand(op).getReg())) ==
- X86::RFP32RegisterClass ||
- clas == X86::RFP64RegisterClass ||
- clas == X86::RFP80RegisterClass)) {
- ContainsFPCode = true;
- break;
- }
- }
- }
- }
- // Check PHI nodes in successor blocks. These PHI's will be lowered to have
- // a copy of the input value in this block. In SSE mode, we only care about
- // 80-bit values.
- if (!ContainsFPCode) {
- // Final check, check LLVM BB's that are successors to the LLVM BB
- // corresponding to BB for FP PHI nodes.
- const BasicBlock *LLVMBB = MBB->getBasicBlock();
- const PHINode *PN;
- for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
- !ContainsFPCode && SI != E; ++SI) {
- for (BasicBlock::const_iterator II = SI->begin();
- (PN = dyn_cast<PHINode>(II)); ++II) {
- if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) ||
- (!Subtarget.hasSSE1() && PN->getType()->isFloatingPointTy()) ||
- (!Subtarget.hasSSE2() &&
- PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) {
- ContainsFPCode = true;
- break;
- }
- }
- }
- }
- // Finally, if we found any FP code, emit the FP_REG_KILL instruction.
- if (ContainsFPCode) {
+ // If we find any FP stack code, emit the FP_REG_KILL instruction.
+ if (ContainsFPStackCode(MBB, MRI)) {
BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(),
MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL));
++NumFPKill;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index fd8bb1e..0f64383 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1693,7 +1693,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
Result,
CurDAG->getTargetConstant(8, MVT::i8)), 0);
// Then truncate it down to i8.
- Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+ Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
MVT::i8, Result);
} else {
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1834,7 +1834,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
CurDAG->getTargetConstant(8, MVT::i8)),
0);
// Then truncate it down to i8.
- Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+ Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
MVT::i8, Result);
} else {
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1883,7 +1883,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
// Extract the l-register.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
MVT::i8, Reg);
// Emit a testb.
@@ -1912,7 +1912,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
Reg.getValueType(), Reg, RC), 0);
// Extract the h-register.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
MVT::i8, Reg);
// Emit a testb. No special NOREX tricks are needed since there's
@@ -1930,7 +1930,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Reg = N0.getNode()->getOperand(0);
// Extract the 16-bit subregister.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
MVT::i16, Reg);
// Emit a testw.
@@ -1946,7 +1946,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Reg = N0.getNode()->getOperand(0);
// Extract the 32-bit subregister.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
MVT::i32, Reg);
// Emit a testl.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6ce9ab7..b02c33d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -94,7 +94,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setShiftAmountType(MVT::i8);
setBooleanContents(ZeroOrOneBooleanContent);
- setSchedulingPreference(SchedulingForRegPressure);
+ setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(X86StackPtr);
if (Subtarget->isTargetDarwin()) {
@@ -145,13 +145,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
} else if (!UseSoftFloat) {
- if (X86ScalarSSEf64) {
- // We have an impenetrably clever algorithm for ui64->double only.
- setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
- }
+ // We have an algorithm for SSE2->double, and we turn this into a
+ // 64-bit FILD followed by conditional FADD for other targets.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
// We have an algorithm for SSE2, and we turn this into a 64-bit
// FILD for other targets.
- setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
}
// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
@@ -215,9 +214,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
- if (!X86ScalarSSEf64) {
+ if (!X86ScalarSSEf64) {
setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand);
+ // Without SSE, i64->f64 goes through memory; i64->MMX is Legal.
+ if (Subtarget->hasMMX() && !DisableMMX)
+ setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Custom);
+ else
+ setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand);
+ }
}
// Scalar integer divide and remainder are lowered to use operations that
@@ -679,6 +686,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSETCC, MVT::v8i8, Custom);
setOperationAction(ISD::VSETCC, MVT::v4i16, Custom);
setOperationAction(ISD::VSETCC, MVT::v2i32, Custom);
+
+ if (!X86ScalarSSEf64 && Subtarget->is64Bit()) {
+ setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v2f32, Custom);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom);
+ }
}
if (!UseSoftFloat && Subtarget->hasSSE1()) {
@@ -1244,10 +1259,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
- if (!Reg) {
- Reg = MRI.createVirtualRegister(getRegClassFor(MVT::i64));
- FuncInfo->setSRetReturnReg(Reg);
- }
+ assert(Reg &&
+ "SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
@@ -1384,6 +1397,8 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg,
return !Subtarget->is64Bit();
case CallingConv::X86_FastCall:
return !Subtarget->is64Bit();
+ case CallingConv::X86_ThisCall:
+ return !Subtarget->is64Bit();
case CallingConv::Fast:
return GuaranteedTailCallOpt;
case CallingConv::GHC:
@@ -1405,6 +1420,8 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
if (CC == CallingConv::X86_FastCall)
return CC_X86_32_FastCall;
+ else if (CC == CallingConv::X86_ThisCall)
+ return CC_X86_32_ThisCall;
else if (CC == CallingConv::Fast)
return CC_X86_32_FastCC;
else if (CC == CallingConv::GHC)
@@ -1596,7 +1613,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- if (Is64Bit || CallConv != CallingConv::X86_FastCall) {
+ if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+ CallConv != CallingConv::X86_ThisCall)) {
FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,
true, false));
}
@@ -1716,7 +1734,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (!Is64Bit) {
// RegSaveFrameIndex is X86-64 only.
FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
- if (CallConv == CallingConv::X86_FastCall)
+ if (CallConv == CallingConv::X86_FastCall ||
+ CallConv == CallingConv::X86_ThisCall)
// fastcc functions can't have varargs.
FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
}
@@ -5272,7 +5291,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
}
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
- MFI->setHasCalls(true);
+ MFI->setAdjustsStack(true);
SDValue Flag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
@@ -5462,7 +5481,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
}
SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
- SDValue StackSlot,
+ SDValue StackSlot,
SelectionDAG &DAG) const {
// Build the FILD
DebugLoc dl = Op.getDebugLoc();
@@ -5636,35 +5655,72 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue N0 = Op.getOperand(0);
DebugLoc dl = Op.getDebugLoc();
- // Now not UINT_TO_FP is legal (it's marked custom), dag combiner won't
+ // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
// the optimization here.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
EVT SrcVT = N0.getValueType();
- if (SrcVT == MVT::i64) {
- // We only handle SSE2 f64 target here; caller can expand the rest.
- if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64)
- return SDValue();
-
+ EVT DstVT = Op.getValueType();
+ if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i64(Op, DAG);
- } else if (SrcVT == MVT::i32 && X86ScalarSSEf64) {
+ else if (SrcVT == MVT::i32 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i32(Op, DAG);
- }
-
- assert(SrcVT == MVT::i32 && "Unknown UINT_TO_FP to lower!");
// Make a 64-bit buffer, and use it to build an FILD.
SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
- SDValue WordOff = DAG.getConstant(4, getPointerTy());
- SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
- getPointerTy(), StackSlot, WordOff);
- SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ if (SrcVT == MVT::i32) {
+ SDValue WordOff = DAG.getConstant(4, getPointerTy());
+ SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
+ getPointerTy(), StackSlot, WordOff);
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot, NULL, 0, false, false, 0);
+ SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
+ OffsetSlot, NULL, 0, false, false, 0);
+ SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+ return Fild;
+ }
+
+ assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot, NULL, 0, false, false, 0);
- SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
- OffsetSlot, NULL, 0, false, false, 0);
- return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+ // For i64 source, we need to add the appropriate power of 2 if the input
+ // was negative. This is the same as the optimization in
+ // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
+ // we must be careful to do the computation in x87 extended precision, not
+ // in SSE. (The generic code can't know it's OK to do this, or how to.)
+ SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
+ SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
+ SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3);
+
+ APInt FF(32, 0x5F800000ULL);
+
+ // Check whether the sign bit is set.
+ SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
+ Op.getOperand(0), DAG.getConstant(0, MVT::i64),
+ ISD::SETLT);
+
+ // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+ SDValue FudgePtr = DAG.getConstantPool(
+ ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+ getPointerTy());
+
+ // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue Four = DAG.getIntPtrConstant(4);
+ SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+ Zero, Four);
+ FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
+
+ // Load the value out, extending it from f32 to f80.
+ // FIXME: Avoid the extend by constructing the right constant pool?
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
+ FudgePtr, PseudoSourceValue::getConstantPool(),
+ 0, MVT::f32, false, false, 4);
+ // Extend everything to 80 bits to force it to be done on x87.
+ SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
+ return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
}
std::pair<SDValue,SDValue> X86TargetLowering::
@@ -6593,221 +6649,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getMergeValues(Ops1, 2, dl);
}
-SDValue
-X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile,
- const Value *DstSV,
- uint64_t DstSVOff) const {
- ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
-
- // If not DWORD aligned or size is more than the threshold, call the library.
- // The libc version is likely to be faster for these cases. It can use the
- // address value and run time information about the CPU.
- if ((Align & 3) != 0 ||
- !ConstantSize ||
- ConstantSize->getZExtValue() >
- getSubtarget()->getMaxInlineSizeThreshold()) {
- SDValue InFlag(0, 0);
-
- // Check to see if there is a specialized entry-point for memory zeroing.
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
-
- if (const char *bzeroEntry = V &&
- V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
- EVT IntPtr = getPointerTy();
- const Type *IntPtrTy = TD->getIntPtrType(*DAG.getContext());
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Node = Dst;
- Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
- Entry.Node = Size;
- Args.push_back(Entry);
- std::pair<SDValue,SDValue> CallResult =
- LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
- false, false, false, false,
- 0, CallingConv::C, false, /*isReturnValueUsed=*/false,
- DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
- return CallResult.second;
- }
-
- // Otherwise have the target-independent code call memset.
- return SDValue();
- }
-
- uint64_t SizeVal = ConstantSize->getZExtValue();
- SDValue InFlag(0, 0);
- EVT AVT;
- SDValue Count;
- ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
- unsigned BytesLeft = 0;
- bool TwoRepStos = false;
- if (ValC) {
- unsigned ValReg;
- uint64_t Val = ValC->getZExtValue() & 255;
-
- // If the value is a constant, then we can potentially use larger sets.
- switch (Align & 3) {
- case 2: // WORD aligned
- AVT = MVT::i16;
- ValReg = X86::AX;
- Val = (Val << 8) | Val;
- break;
- case 0: // DWORD aligned
- AVT = MVT::i32;
- ValReg = X86::EAX;
- Val = (Val << 8) | Val;
- Val = (Val << 16) | Val;
- if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
- AVT = MVT::i64;
- ValReg = X86::RAX;
- Val = (Val << 32) | Val;
- }
- break;
- default: // Byte aligned
- AVT = MVT::i8;
- ValReg = X86::AL;
- Count = DAG.getIntPtrConstant(SizeVal);
- break;
- }
-
- if (AVT.bitsGT(MVT::i8)) {
- unsigned UBytes = AVT.getSizeInBits() / 8;
- Count = DAG.getIntPtrConstant(SizeVal / UBytes);
- BytesLeft = SizeVal % UBytes;
- }
-
- Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
- InFlag);
- InFlag = Chain.getValue(1);
- } else {
- AVT = MVT::i8;
- Count = DAG.getIntPtrConstant(SizeVal);
- Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
- X86::ECX,
- Count, InFlag);
- InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
- X86::EDI,
- Dst, InFlag);
- InFlag = Chain.getValue(1);
-
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
- Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
-
- if (TwoRepStos) {
- InFlag = Chain.getValue(1);
- Count = Size;
- EVT CVT = Count.getValueType();
- SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
- DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
- Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
- X86::ECX,
- Left, InFlag);
- InFlag = Chain.getValue(1);
- Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
- Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
- } else if (BytesLeft) {
- // Handle the last 1 - 7 bytes.
- unsigned Offset = SizeVal - BytesLeft;
- EVT AddrVT = Dst.getValueType();
- EVT SizeVT = Size.getValueType();
-
- Chain = DAG.getMemset(Chain, dl,
- DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
- DAG.getConstant(Offset, AddrVT)),
- Src,
- DAG.getConstant(BytesLeft, SizeVT),
- Align, isVolatile, DstSV, DstSVOff + Offset);
- }
-
- // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
- return Chain;
-}
-
-SDValue
-X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const {
- // This requires the copy size to be a constant, preferrably
- // within a subtarget-specific limit.
- ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
- if (!ConstantSize)
- return SDValue();
- uint64_t SizeVal = ConstantSize->getZExtValue();
- if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
- return SDValue();
-
- /// If not DWORD aligned, call the library.
- if ((Align & 3) != 0)
- return SDValue();
-
- // DWORD aligned
- EVT AVT = MVT::i32;
- if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
- AVT = MVT::i64;
-
- unsigned UBytes = AVT.getSizeInBits() / 8;
- unsigned CountVal = SizeVal / UBytes;
- SDValue Count = DAG.getIntPtrConstant(CountVal);
- unsigned BytesLeft = SizeVal % UBytes;
-
- SDValue InFlag(0, 0);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
- X86::ECX,
- Count, InFlag);
- InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
- X86::EDI,
- Dst, InFlag);
- InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
- X86::ESI,
- Src, InFlag);
- InFlag = Chain.getValue(1);
-
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
- SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
- array_lengthof(Ops));
-
- SmallVector<SDValue, 4> Results;
- Results.push_back(RepMovs);
- if (BytesLeft) {
- // Handle the last 1 - 7 bytes.
- unsigned Offset = SizeVal - BytesLeft;
- EVT DstVT = Dst.getValueType();
- EVT SrcVT = Src.getValueType();
- EVT SizeVT = Size.getValueType();
- Results.push_back(DAG.getMemcpy(Chain, dl,
- DAG.getNode(ISD::ADD, dl, DstVT, Dst,
- DAG.getConstant(Offset, DstVT)),
- DAG.getNode(ISD::ADD, dl, SrcVT, Src,
- DAG.getConstant(Offset, SrcVT)),
- DAG.getConstant(BytesLeft, SizeVT),
- Align, isVolatile, AlwaysInline,
- DstSV, DstSVOff + Offset,
- SrcSV, SrcSVOff + Offset));
- }
-
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &Results[0], Results.size());
-}
-
SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
@@ -7138,6 +6979,9 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
DebugLoc dl = Op.getDebugLoc();
@@ -7161,6 +7005,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
+
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -7298,6 +7143,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
break;
}
case CallingConv::X86_FastCall:
+ case CallingConv::X86_ThisCall:
case CallingConv::Fast:
// Pass 'nest' parameter in EAX.
// Must be kept in sync with X86CallingConv.td
@@ -7630,6 +7476,27 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
return DAG.getMergeValues(Ops, 2, dl);
}
+SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT SrcVT = Op.getOperand(0).getValueType();
+ EVT DstVT = Op.getValueType();
+ assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
+ Subtarget->hasMMX() && !DisableMMX) &&
+ "Unexpected custom BIT_CONVERT");
+ assert((DstVT == MVT::i64 ||
+ (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
+ "Unexpected custom BIT_CONVERT");
+ // i64 <=> MMX conversions are Legal.
+ if (SrcVT==MVT::i64 && DstVT.isVector())
+ return Op;
+ if (DstVT==MVT::i64 && SrcVT.isVector())
+ return Op;
+ // MMX <=> MMX conversions are Legal.
+ if (SrcVT.isVector() && DstVT.isVector())
+ return Op;
+ // All other conversions need to be expanded.
+ return SDValue();
+}
SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
@@ -7699,6 +7566,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMULO:
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
+ case ISD::BIT_CONVERT: return LowerBIT_CONVERT(Op, DAG);
}
}
@@ -8203,9 +8071,15 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
MachineOperand& dest1Oper = bInstr->getOperand(0);
MachineOperand& dest2Oper = bInstr->getOperand(1);
MachineOperand* argOpers[2 + X86AddrNumOperands];
- for (int i=0; i < 2 + X86AddrNumOperands; ++i)
+ for (int i=0; i < 2 + X86AddrNumOperands; ++i) {
argOpers[i] = &bInstr->getOperand(i+2);
+ // We use some of the operands multiple times, so conservatively just
+ // clear any kill flags that might be present.
+ if (argOpers[i]->isReg() && argOpers[i]->isUse())
+ argOpers[i]->setIsKill(false);
+ }
+
// x86 address has 5 operands: base, index, scale, displacement, and segment.
int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 440601f9..1ef1a7b 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -563,7 +563,7 @@ namespace llvm {
return !X86ScalarSSEf64 || VT == MVT::f80;
}
- virtual const X86Subtarget* getSubtarget() const {
+ const X86Subtarget* getSubtarget() const {
return Subtarget;
}
@@ -677,6 +677,7 @@ namespace llvm {
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
SelectionDAG &DAG) const;
+ SDValue LowerBIT_CONVERT(SDValue op, SelectionDAG &DAG) const;
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
@@ -743,23 +744,6 @@ namespace llvm {
void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned NewOp) const;
- SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile,
- const Value *DstSV,
- uint64_t DstSVOff) const;
- SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const;
-
/// Utility function to emit string processing sse4.2 instructions
/// that return in xmm0.
/// This takes the instruction to expand, the associated machine basic
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index f5c3dbf..97eb17c 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -18,7 +18,9 @@
//
// 64-bits but only 32 bits are significant.
-def i64i32imm : Operand<i64>;
+def i64i32imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i32AsmOperand;
+}
// 64-bits but only 32 bits are significant, and those bits are treated as being
// pc relative.
@@ -30,7 +32,7 @@ def i64i32imm_pcrel : Operand<i64> {
// 64-bits but only 8 bits are significant.
def i64i8imm : Operand<i64> {
- let ParserMatchClass = ImmSExt8AsmOperand;
+ let ParserMatchClass = ImmSExti64i8AsmOperand;
}
// Special i64mem for addresses of load folding tail calls. These are not
@@ -198,6 +200,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset,
variable_ops),
"#TC_RETURN $dst $offset", []>;
+ let mayLoad = 1 in
def TCRETURNmi64 : I<0, Pseudo, (outs),
(ins i64mem_TC:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset", []>;
@@ -208,6 +211,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops),
"jmp{q}\t{*}$dst # TAILCALL", []>;
+ let mayLoad = 1 in
def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
"jmp{q}\t{*}$dst # TAILCALL", []>;
}
@@ -241,6 +245,7 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
+let mayLoad = 1 in
def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
@@ -267,14 +272,16 @@ def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
"push{q}\t$imm", []>;
def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
"push{q}\t$imm", []>;
-def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
+def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
"push{q}\t$imm", []>;
}
-let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in
-def POPFQ : I<0x9D, RawFrm, (outs), (ins), "popf{q}", []>, REX_W;
-let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in
-def PUSHFQ64 : I<0x9C, RawFrm, (outs), (ins), "pushf{q}", []>;
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
+def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
+ Requires<[In64BitMode]>;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
+ Requires<[In64BitMode]>;
def LEA64_32r : I<0x8D, MRMSrcMem,
(outs GR32:$dst), (ins lea64_32mem:$src),
@@ -309,16 +316,22 @@ def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
} // Defs = [EFLAGS]
// Repeat string ops
-let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI] in
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
[(X86rep_movs i64)]>, REP;
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI] in
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
[(X86rep_stos i64)]>, REP;
-def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scas{q}", []>;
+let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in
+def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
+
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
-def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmps{q}", []>;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
+
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
// Fast system-call instructions
def SYSEXIT64 : RI<0x35, RawFrm,
@@ -341,8 +354,17 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
[(set GR64:$dst, i64immSExt32:$src)]>;
}
+// The assembler accepts movq of a 64-bit immediate as an alternate spelling of
+// movabsq.
+let isAsmParserOnly = 1 in {
+def MOV64ri_alt : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
+
+let isCodeGenOnly = 1 in {
def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
@@ -398,9 +420,9 @@ def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
// Moves to and from control registers
-def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG_64:$src),
+def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_64:$dst), (ins GR64:$src),
+def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
// Sign/Zero extenders
@@ -478,7 +500,7 @@ def def32 : PatLeaf<(i32 GR32:$src), [{
// In the case of a 32-bit def that is known to implicitly zero-extend,
// we can use a SUBREG_TO_REG.
def : Pat<(i64 (zext def32:$src)),
- (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>;
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
let neverHasSideEffects = 1 in {
let Defs = [RAX], Uses = [EAX] in
@@ -496,7 +518,7 @@ let neverHasSideEffects = 1 in {
let Defs = [EFLAGS] in {
-def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i32imm:$src),
+def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src),
"add{q}\t{$src, %rax|%rax, $src}", []>;
let isTwoAddress = 1 in {
@@ -555,7 +577,7 @@ def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2),
let Uses = [EFLAGS] in {
-def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i32imm:$src),
+def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src),
"adc{q}\t{$src, %rax|%rax, $src}", []>;
let isTwoAddress = 1 in {
@@ -565,9 +587,11 @@ def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst),
"adc{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def ADC64rr_REV : RI<0x13, MRMSrcReg , (outs GR32:$dst),
(ins GR64:$src1, GR64:$src2),
"adc{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
def ADC64rm : RI<0x13, MRMSrcMem , (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
@@ -605,9 +629,11 @@ def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst),
[(set GR64:$dst, EFLAGS,
(X86sub_flag GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"sub{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
// Register-Memory Subtraction
def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst),
@@ -629,7 +655,7 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
(X86sub_flag GR64:$src1, i64immSExt32:$src2))]>;
} // isTwoAddress
-def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i32imm:$src),
+def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src),
"sub{q}\t{$src, %rax|%rax, $src}", []>;
// Memory-Register Subtraction
@@ -657,9 +683,11 @@ def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst),
"sbb{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def SBB64rr_REV : RI<0x1B, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"sbb{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
def SBB64rm : RI<0x1B, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
@@ -676,7 +704,7 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst),
[(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
} // isTwoAddress
-def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i32imm:$src),
+def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src),
"sbb{q}\t{$src, %rax|%rax, $src}", []>;
def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
@@ -1076,7 +1104,7 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
[(store (not (loadi64 addr:$dst)), addr:$dst)]>;
let Defs = [EFLAGS] in {
-def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i32imm:$src),
+def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src),
"and{q}\t{$src, %rax|%rax, $src}", []>;
let isTwoAddress = 1 in {
@@ -1086,9 +1114,11 @@ def AND64rr : RI<0x21, MRMDestReg,
"and{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86and_flag GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
def AND64rm : RI<0x23, MRMSrcMem,
(outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}",
@@ -1129,9 +1159,11 @@ def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst),
"or{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86or_flag GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}",
@@ -1162,7 +1194,7 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src),
[(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
(implicit EFLAGS)]>;
-def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i32imm:$src),
+def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src),
"or{q}\t{$src, %rax|%rax, $src}", []>;
let isTwoAddress = 1 in {
@@ -1172,9 +1204,11 @@ def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst),
"xor{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86xor_flag GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}",
@@ -1205,7 +1239,7 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src),
[(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
(implicit EFLAGS)]>;
-def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src),
+def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i64i32imm:$src),
"xor{q}\t{$src, %rax|%rax, $src}", []>;
} // Defs = [EFLAGS]
@@ -1216,7 +1250,7 @@ def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src),
// Integer comparison
let Defs = [EFLAGS] in {
-def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i32imm:$src),
+def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i64i32imm:$src),
"test{q}\t{$src, %rax|%rax, $src}", []>;
let isCommutable = 1 in
def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
@@ -1238,7 +1272,7 @@ def TEST64mi32 : RIi32<0xF7, MRM0m, (outs),
i64immSExt32:$src2), 0))]>;
-def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i32imm:$src),
+def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i64i32imm:$src),
"cmp{q}\t{$src, %rax|%rax, $src}", []>;
def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"cmp{q}\t{$src2, $src1|$src1, $src2}",
@@ -1293,7 +1327,8 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+ [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB,
+ REX_W;
// Note that these instructions don't need FastBTMem because that
// only applies when the other operand is in a register. When it's
// an immediate, bt is still fast.
@@ -1714,11 +1749,13 @@ def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+let mayLoad = 1, mayStore = 1 in
def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+let mayLoad = 1, mayStore = 1 in
def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
@@ -1730,7 +1767,7 @@ def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
"xchg{q}\t{$src, %rax|%rax, $src}", []>;
// Optimized codegen when the non-memory output is not used.
-let Defs = [EFLAGS] in {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"lock\n\t"
@@ -1982,14 +2019,14 @@ def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
// defined after an extload.
def : Pat<(extloadi64i32 addr:$src),
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
- x86_subreg_32bit)>;
+ sub_32bit)>;
// anyext. Define these to do an explicit zero-extend to
// avoid partial-register updates.
def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
def : Pat<(i64 (anyext GR32:$src)),
- (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>;
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
//===----------------------------------------------------------------------===//
// Some peepholes
@@ -2016,54 +2053,54 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm),
(SUBREG_TO_REG
(i64 0),
(AND32ri
- (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit),
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
(i32 (GetLo32XForm imm:$imm))),
- x86_subreg_32bit)>;
+ sub_32bit)>;
// r & (2^32-1) ==> movz
def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
- (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
+ (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
// r & (2^16-1) ==> movz
def : Pat<(and GR64:$src, 0xffff),
- (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>;
+ (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
// r & (2^8-1) ==> movz
def : Pat<(and GR64:$src, 0xff),
- (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>;
+ (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
// r & (2^8-1) ==> movz
def : Pat<(and GR32:$src1, 0xff),
- (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit))>,
+ (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
Requires<[In64BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
- (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, x86_subreg_8bit)))>,
+ (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
Requires<[In64BitMode]>;
// sext_inreg patterns
def : Pat<(sext_inreg GR64:$src, i32),
- (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
+ (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
def : Pat<(sext_inreg GR64:$src, i16),
- (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
+ (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
def : Pat<(sext_inreg GR64:$src, i8),
- (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>;
+ (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
def : Pat<(sext_inreg GR32:$src, i8),
- (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>,
+ (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
Requires<[In64BitMode]>;
def : Pat<(sext_inreg GR16:$src, i8),
- (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)))>,
+ (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
Requires<[In64BitMode]>;
// trunc patterns
def : Pat<(i32 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)>;
+ (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
def : Pat<(i16 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)>;
+ (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
def : Pat<(i8 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)>;
+ (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
def : Pat<(i8 (trunc GR32:$src)),
- (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)>,
+ (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
Requires<[In64BitMode]>;
def : Pat<(i8 (trunc GR16:$src)),
- (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)>,
+ (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
Requires<[In64BitMode]>;
// h-register tricks.
@@ -2079,67 +2116,67 @@ def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
(i64 0),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
- x86_subreg_8bit_hi)),
- x86_subreg_32bit)>;
+ sub_8bit_hi)),
+ sub_32bit)>;
def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
(MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(srl GR16:$src, (i8 8)),
(EXTRACT_SUBREG
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi)),
- x86_subreg_16bit)>,
+ sub_8bit_hi)),
+ sub_16bit)>,
Requires<[In64BitMode]>;
def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
(SUBREG_TO_REG
(i64 0),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi)),
- x86_subreg_32bit)>;
+ sub_8bit_hi)),
+ sub_32bit)>;
def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
(SUBREG_TO_REG
(i64 0),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi)),
- x86_subreg_32bit)>;
+ sub_8bit_hi)),
+ sub_32bit)>;
// h-register extract and store.
def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
(MOV8mr_NOREX
addr:$dst,
(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
- x86_subreg_8bit_hi))>;
+ sub_8bit_hi))>;
def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
(MOV8mr_NOREX
addr:$dst,
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
(MOV8mr_NOREX
addr:$dst,
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
// (shl x, 1) ==> (add x, x)
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index a21bfb9..34e12ca 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -744,17 +744,17 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
case X86::MOVZX32rr8:
case X86::MOVSX64rr8:
case X86::MOVZX64rr8:
- SubIdx = 1;
+ SubIdx = X86::sub_8bit;
break;
case X86::MOVSX32rr16:
case X86::MOVZX32rr16:
case X86::MOVSX64rr16:
case X86::MOVZX64rr16:
- SubIdx = 3;
+ SubIdx = X86::sub_16bit;
break;
case X86::MOVSX64rr32:
case X86::MOVZX64rr32:
- SubIdx = 4;
+ SubIdx = X86::sub_32bit;
break;
}
return true;
@@ -1065,7 +1065,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
const TargetRegisterInfo *TRI) const {
- DebugLoc DL = MBB.findDebugLoc(I);
+ DebugLoc DL = Orig->getDebugLoc();
if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
DestReg = TRI->getSubReg(DestReg, SubIdx);
@@ -1154,7 +1154,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
.addReg(leaInReg)
.addReg(Src, getKillRegState(isKill))
- .addImm(X86::SUBREG_16BIT);
+ .addImm(X86::sub_16bit);
MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
get(Opc), leaOutReg);
@@ -1198,7 +1198,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2)
.addReg(leaInReg2)
.addReg(Src2, getKillRegState(isKill2))
- .addImm(X86::SUBREG_16BIT);
+ .addImm(X86::sub_16bit);
addRegReg(MIB, leaInReg, true, leaInReg2, true);
}
if (LV && isKill2 && InsMI2)
@@ -1212,7 +1212,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(leaOutReg, RegState::Kill)
- .addImm(X86::SUBREG_16BIT);
+ .addImm(X86::sub_16bit);
if (LV) {
// Update live variables
@@ -1901,8 +1901,8 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL = MBB.findDebugLoc(MI);
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
// Determine if DstRC and SrcRC have a common superclass in common.
const TargetRegisterClass *CommonRC = DestRC;
@@ -1993,12 +1993,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
if (SrcReg != X86::EFLAGS)
return false;
if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
- BuildMI(MBB, MI, DL, get(X86::PUSHFQ64));
+ BuildMI(MBB, MI, DL, get(X86::PUSHF64));
BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
return true;
} else if (DestRC == &X86::GR32RegClass ||
DestRC == &X86::GR32_NOSPRegClass) {
- BuildMI(MBB, MI, DL, get(X86::PUSHFD));
+ BuildMI(MBB, MI, DL, get(X86::PUSHF32));
BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
return true;
}
@@ -2007,12 +2007,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
return false;
if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg);
- BuildMI(MBB, MI, DL, get(X86::POPFQ));
+ BuildMI(MBB, MI, DL, get(X86::POPF64));
return true;
} else if (SrcRC == &X86::GR32RegClass ||
DestRC == &X86::GR32_NOSPRegClass) {
BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg);
- BuildMI(MBB, MI, DL, get(X86::POPFD));
+ BuildMI(MBB, MI, DL, get(X86::POPF32));
return true;
}
}
@@ -2133,7 +2133,8 @@ static unsigned getStoreRegOpcode(unsigned SrcReg,
void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
@@ -2230,7 +2231,8 @@ static unsigned getLoadRegOpcode(unsigned DestReg,
void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const{
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
@@ -2256,7 +2258,8 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -2284,7 +2287,8 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
CalleeFrameSize += SlotSize;
BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
} else {
- storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass);
+ storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass,
+ &RI);
}
}
@@ -2294,7 +2298,8 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -2314,7 +2319,7 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (RegClass != &X86::VR128RegClass && !isWin64) {
BuildMI(MBB, MI, DL, get(Opc), Reg);
} else {
- loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass);
+ loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI);
}
}
return true;
@@ -2478,9 +2483,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
unsigned DstReg = NewMI->getOperand(0).getReg();
if (TargetRegisterInfo::isPhysicalRegister(DstReg))
NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
- 4/*x86_subreg_32bit*/));
+ X86::sub_32bit));
else
- NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/);
+ NewMI->getOperand(0).setSubReg(X86::sub_32bit);
}
return NewMI;
}
@@ -2526,9 +2531,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
switch (MI->getOpcode()) {
default: return NULL;
case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
- case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break;
- case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break;
- case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
}
// Check if it's safe to fold the load. If the size of the object is
// narrower than the load width, then it's not.
@@ -2595,9 +2600,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
switch (MI->getOpcode()) {
default: return NULL;
case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
- case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
- case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
- case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
}
// Change to CMPXXri r, 0 first.
MI->setDesc(get(NewOpc));
@@ -2805,16 +2810,22 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
switch (DataMI->getOpcode()) {
default: break;
case X86::CMP64ri32:
+ case X86::CMP64ri8:
case X86::CMP32ri:
+ case X86::CMP32ri8:
case X86::CMP16ri:
+ case X86::CMP16ri8:
case X86::CMP8ri: {
MachineOperand &MO0 = DataMI->getOperand(0);
MachineOperand &MO1 = DataMI->getOperand(1);
if (MO1.getImm() == 0) {
switch (DataMI->getOpcode()) {
default: break;
+ case X86::CMP64ri8:
case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
+ case X86::CMP32ri8:
case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
+ case X86::CMP16ri8:
case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index df99c7f..62d7c74 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -590,11 +590,13 @@ public:
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
SmallVectorImpl<MachineOperand> &Addr,
@@ -606,7 +608,8 @@ public:
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
SmallVectorImpl<MachineOperand> &Addr,
@@ -617,11 +620,13 @@ public:
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
virtual
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index a2754ea..0d59c42 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -195,15 +195,15 @@ def ptr_rc_nosp : PointerLikeRegClass<1>;
//
def X86MemAsmOperand : AsmOperandClass {
let Name = "Mem";
- let SuperClass = ?;
-}
-def X86AbsMemAsmOperand : AsmOperandClass {
- let Name = "AbsMem";
- let SuperClass = X86MemAsmOperand;
+ let SuperClasses = [];
}
def X86NoSegMemAsmOperand : AsmOperandClass {
let Name = "NoSegMem";
- let SuperClass = X86MemAsmOperand;
+ let SuperClasses = [X86MemAsmOperand];
+}
+def X86AbsMemAsmOperand : AsmOperandClass {
+ let Name = "AbsMem";
+ let SuperClasses = [X86NoSegMemAsmOperand];
}
class X86MemOperand<string printMethod> : Operand<iPTR> {
let PrintMethod = printMethod;
@@ -270,19 +270,49 @@ def SSECC : Operand<i8> {
let PrintMethod = "printSSECC";
}
-def ImmSExt8AsmOperand : AsmOperandClass {
- let Name = "ImmSExt8";
- let SuperClass = ImmAsmOperand;
+class ImmSExtAsmOperandClass : AsmOperandClass {
+ let SuperClasses = [ImmAsmOperand];
+ let RenderMethod = "addImmOperands";
+}
+
+// Sign-extended immediate classes. We don't need to define the full lattice
+// here because there is no instruction with an ambiguity between ImmSExti64i32
+// and ImmSExti32i8.
+//
+// The strange ranges come from the fact that the assembler always works with
+// 64-bit immediates, but for a 16-bit target value we want to accept both "-1"
+// (which will be a -1ULL), and "0xFF" (-1 in 16-bits).
+
+// [0, 0x7FFFFFFF] | [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti64i32";
+}
+
+// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti16i8";
+ let SuperClasses = [ImmSExti64i32AsmOperand];
+}
+
+// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti32i8";
+}
+
+// [0, 0x0000007F] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti64i8";
+ let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, ImmSExti64i32AsmOperand];
}
// A couple of more descriptive operand definitions.
// 16-bits but only 8 bits are significant.
def i16i8imm : Operand<i16> {
- let ParserMatchClass = ImmSExt8AsmOperand;
+ let ParserMatchClass = ImmSExti16i8AsmOperand;
}
// 32-bits but only 8 bits are significant.
def i32i8imm : Operand<i32> {
- let ParserMatchClass = ImmSExt8AsmOperand;
+ let ParserMatchClass = ImmSExti32i8AsmOperand;
}
//===----------------------------------------------------------------------===//
@@ -542,8 +572,10 @@ let neverHasSideEffects = 1 in {
}
// Trap
-def INT3 : I<0xcc, RawFrm, (outs), (ins), "int\t3", []>;
-def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
+def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", []>;
+// FIXME: need to make sure that "int $3" matches int3
+def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>;
@@ -693,6 +725,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TCRETURNri : I<0, Pseudo, (outs),
(ins GR32_TC:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset", []>;
+ let mayLoad = 1 in
def TCRETURNmi : I<0, Pseudo, (outs),
(ins i32mem_TC:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset", []>;
@@ -706,8 +739,16 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
"jmp{l}\t{*}$dst # TAILCALL",
[]>;
+ let mayLoad = 1 in
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
"jmp{l}\t{*}$dst # TAILCALL", []>;
+
+ // FIXME: This is a hack so that MCInst lowering can preserve the TAILCALL
+ // marker on instructions, while still being able to relax.
+ let isCodeGenOnly = 1 in {
+ def TAILJMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+ "jmp\t$dst # TAILCALL", []>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -719,10 +760,12 @@ def LEAVE : I<0xC9, RawFrm,
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
+let mayLoad = 1 in
def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
+let mayLoad = 1 in
def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
@@ -762,12 +805,14 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
}
let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in {
-def POPF : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
-def POPFD : I<0x9D, RawFrm, (outs), (ins), "popf{l}", []>;
+def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
+def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
+ Requires<[In32BitMode]>;
}
let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in {
-def PUSHF : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
-def PUSHFD : I<0x9C, RawFrm, (outs), (ins), "pushf{l}", []>;
+def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
+def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
+ Requires<[In32BitMode]>;
}
let isTwoAddress = 1 in // GR32 = bswap GR32
@@ -867,7 +912,7 @@ def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
let Defs = [RAX, RCX, RDX] in
def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
-let isBarrier = 1, hasCtrlDep = 1 in {
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
}
@@ -966,36 +1011,47 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(store (i32 imm:$src), addr:$dst)]>;
-def MOV8o8a : Ii8 <0xA0, RawFrm, (outs), (ins offset8:$src),
+/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
+/// 32-bit offset from the PC. These are only valid in x86-32 mode.
+def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
"mov{b}\t{$src, %al|%al, $src}", []>;
-def MOV16o16a : Ii16 <0xA1, RawFrm, (outs), (ins offset16:$src),
+def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
"mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
"mov{l}\t{$src, %eax|%eax, $src}", []>;
-
-def MOV8ao8 : Ii8 <0xA2, RawFrm, (outs offset8:$dst), (ins),
+def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
"mov{b}\t{%al, $dst|$dst, %al}", []>;
-def MOV16ao16 : Ii16 <0xA3, RawFrm, (outs offset16:$dst), (ins),
+def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
"mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize;
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
"mov{l}\t{%eax, $dst|$dst, %eax}", []>;
-
+
// Moves to and from segment registers
def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>;
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>;
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>;
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>;
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+let isCodeGenOnly = 1 in {
def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
"mov{b}\t{$src, $dst|$dst, $src}", []>;
def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
+}
let canFoldAsLoad = 1, isReMaterializable = 1 in {
def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
@@ -1059,10 +1115,10 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
// Moves to and from control registers
-def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG_32:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
//===----------------------------------------------------------------------===//
// Fixed-Register Multiplication and Division Instructions...
@@ -1746,6 +1802,7 @@ def AND32rr : I<0x21, MRMDestReg,
// AND instructions with the destination register in REG and the source register
// in R/M. Included for the disassembler.
+let isCodeGenOnly = 1 in {
def AND8rr_REV : I<0x22, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"and{b}\t{$src2, $dst|$dst, $src2}", []>;
def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst),
@@ -1754,6 +1811,7 @@ def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst),
def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"and{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
def AND8rm : I<0x22, MRMSrcMem,
(outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
@@ -1872,6 +1930,7 @@ def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst),
// OR instructions with the destination register in REG and the source register
// in R/M. Included for the disassembler.
+let isCodeGenOnly = 1 in {
def OR8rr_REV : I<0x0A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"or{b}\t{$src2, $dst|$dst, $src2}", []>;
def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst),
@@ -1880,6 +1939,7 @@ def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst),
def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"or{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
def OR8rm : I<0x0A, MRMSrcMem, (outs GR8 :$dst),
(ins GR8 :$src1, i8mem :$src2),
@@ -1988,6 +2048,7 @@ let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
// XOR instructions with the destination register in REG and the source register
// in R/M. Included for the disassembler.
+let isCodeGenOnly = 1 in {
def XOR8rr_REV : I<0x32, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"xor{b}\t{$src2, $dst|$dst, $src2}", []>;
def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst),
@@ -1996,6 +2057,7 @@ def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst),
def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
def XOR8rm : I<0x32, MRMSrcMem,
(outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2),
@@ -2793,6 +2855,7 @@ def ADC32rr : I<0x11, MRMDestReg, (outs GR32:$dst),
[(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>;
}
+let isCodeGenOnly = 1 in {
def ADC8rr_REV : I<0x12, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"adc{b}\t{$src2, $dst|$dst, $src2}", []>;
def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst),
@@ -2801,6 +2864,7 @@ def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst),
def ADC32rr_REV : I<0x13, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"adc{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
def ADC8rm : I<0x12, MRMSrcMem , (outs GR8:$dst),
(ins GR8:$src1, i8mem:$src2),
@@ -2888,6 +2952,7 @@ def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
[(set GR32:$dst, EFLAGS,
(X86sub_flag GR32:$src1, GR32:$src2))]>;
+let isCodeGenOnly = 1 in {
def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}", []>;
def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst),
@@ -2896,6 +2961,7 @@ def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst),
def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
// Register-Memory Subtraction
def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst),
@@ -3039,6 +3105,7 @@ let isTwoAddress = 0 in {
"sbb{l}\t{$src, %eax|%eax, $src}", []>;
}
+let isCodeGenOnly = 1 in {
def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"sbb{b}\t{$src2, $dst|$dst, $src2}", []>;
def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst),
@@ -3047,6 +3114,7 @@ def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst),
def SBB32rr_REV : I<0x1B, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"sbb{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2),
"sbb{b}\t{$src2, $dst|$dst, $src2}",
@@ -3864,12 +3932,14 @@ def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+let mayLoad = 1, mayStore = 1 in {
def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
"xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+}
def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
"cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
@@ -3878,12 +3948,14 @@ def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+let mayLoad = 1, mayStore = 1 in {
def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
"cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+}
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
@@ -3891,7 +3963,7 @@ def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
// Optimized codegen when the non-memory output is not used.
// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-let Defs = [EFLAGS] in {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
"lock\n\t"
"add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
@@ -4453,7 +4525,7 @@ def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
def : Pat<(i32 (anyext GR16:$src)),
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
//===----------------------------------------------------------------------===//
@@ -4473,81 +4545,81 @@ def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
// r & (2^16-1) ==> movz
def : Pat<(and GR32:$src1, 0xffff),
- (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>;
+ (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
// r & (2^8-1) ==> movz
def : Pat<(and GR32:$src1, 0xff),
(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
GR32_ABCD)),
- x86_subreg_8bit))>,
+ sub_8bit))>,
Requires<[In32BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
(MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1,
GR16_ABCD)),
- x86_subreg_8bit))>,
+ sub_8bit))>,
Requires<[In32BitMode]>;
// sext_inreg patterns
def : Pat<(sext_inreg GR32:$src, i16),
- (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
+ (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
def : Pat<(sext_inreg GR32:$src, i8),
(MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
- x86_subreg_8bit))>,
+ sub_8bit))>,
Requires<[In32BitMode]>;
def : Pat<(sext_inreg GR16:$src, i8),
(MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
GR16_ABCD)),
- x86_subreg_8bit))>,
+ sub_8bit))>,
Requires<[In32BitMode]>;
// trunc patterns
def : Pat<(i16 (trunc GR32:$src)),
- (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>;
+ (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
def : Pat<(i8 (trunc GR32:$src)),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- x86_subreg_8bit)>,
+ sub_8bit)>,
Requires<[In32BitMode]>;
def : Pat<(i8 (trunc GR16:$src)),
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit)>,
+ sub_8bit)>,
Requires<[In32BitMode]>;
// h-register tricks
def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi)>,
+ sub_8bit_hi)>,
Requires<[In32BitMode]>;
def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- x86_subreg_8bit_hi)>,
+ sub_8bit_hi)>,
Requires<[In32BitMode]>;
def : Pat<(srl GR16:$src, (i8 8)),
(EXTRACT_SUBREG
(MOVZX32rr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi)),
- x86_subreg_16bit)>,
+ sub_8bit_hi)),
+ sub_16bit)>,
Requires<[In32BitMode]>;
def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
GR16_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In32BitMode]>;
def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
GR16_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In32BitMode]>;
def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In32BitMode]>;
def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In32BitMode]>;
// (shl x, 1) ==> (add x, x)
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 744af50..0952fc8 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -117,9 +117,6 @@ def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
-def MMX_MOVQ64gmr : MMXRI<0x7F, MRMDestMem, (outs),
- (ins i64mem:$dst, VR64:$src),
- "movq\t{$src, $dst|$dst, $src}", []>;
let neverHasSideEffects = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
@@ -133,10 +130,10 @@ let neverHasSideEffects = 1 in
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
-def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
- (v1i64 (scalar_to_vector GR64:$src)))]>;
+def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (v1i64 (scalar_to_vector GR64:$src)))]>;
let neverHasSideEffects = 1 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 2129580..5580ba7 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -117,17 +117,17 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
-def alignedloadfsf32 : PatFrag<(ops node:$ptr),
+def alignedloadfsf32 : PatFrag<(ops node:$ptr),
(f32 (alignedload node:$ptr))>;
-def alignedloadfsf64 : PatFrag<(ops node:$ptr),
+def alignedloadfsf64 : PatFrag<(ops node:$ptr),
(f64 (alignedload node:$ptr))>;
-def alignedloadv4f32 : PatFrag<(ops node:$ptr),
+def alignedloadv4f32 : PatFrag<(ops node:$ptr),
(v4f32 (alignedload node:$ptr))>;
-def alignedloadv2f64 : PatFrag<(ops node:$ptr),
+def alignedloadv2f64 : PatFrag<(ops node:$ptr),
(v2f64 (alignedload node:$ptr))>;
-def alignedloadv4i32 : PatFrag<(ops node:$ptr),
+def alignedloadv4i32 : PatFrag<(ops node:$ptr),
(v4i32 (alignedload node:$ptr))>;
-def alignedloadv2i64 : PatFrag<(ops node:$ptr),
+def alignedloadv2i64 : PatFrag<(ops node:$ptr),
(v2i64 (alignedload node:$ptr))>;
// Like 'load', but uses special alignment checks suitable for use in
@@ -387,11 +387,11 @@ def MOVSSrr : SSI<0x10, MRMSrcReg,
let AddedComplexity = 15 in
def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
(MOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>;
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>;
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
// Loading from memory automatically zeroing upper bits.
let canFoldAsLoad = 1, isReMaterializable = 1 in
@@ -403,11 +403,11 @@ def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
// with SUBREG_TO_REG.
let AddedComplexity = 20 in {
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
}
// Store scalar value to memory.
@@ -419,7 +419,7 @@ def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSSmr addr:$dst,
- (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
// Conversion instructions
def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
@@ -449,7 +449,7 @@ def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
[(set GR32:$dst, (int_x86_sse_cvtss2si
(load addr:$src)))]>;
-// Match intrinisics which expect MM and XMM operand(s).
+// Match intrinsics which expect MM and XMM operand(s).
def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
"cvtps2pi\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
@@ -509,6 +509,17 @@ let mayLoad = 1 in
def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
(outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
"cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+ def CMPSSrr_alt : SSIi8<0xC2, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
+ "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+let mayLoad = 1 in
+ def CMPSSrm_alt : SSIi8<0xC2, MRMSrcMem,
+ (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
+ "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+}
}
let Defs = [EFLAGS] in {
@@ -518,25 +529,25 @@ def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
"ucomiss\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>;
-
+
def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"comiss\t{$src2, $src1|$src1, $src2}", []>;
def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"comiss\t{$src2, $src1|$src1, $src2}", []>;
-
+
} // Defs = [EFLAGS]
// Aliases to match intrinsics which expect XMM operand(s).
let Constraints = "$src1 = $dst" in {
def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst),
+ (outs VR128:$dst),
(ins VR128:$src1, VR128:$src, SSECC:$cc),
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ss
+ [(set VR128:$dst, (int_x86_sse_cmp_ss
VR128:$src1,
VR128:$src, imm:$cc))]>;
def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst),
+ (outs VR128:$dst),
(ins VR128:$src1, f32mem:$src, SSECC:$cc),
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
@@ -1009,6 +1020,16 @@ let Constraints = "$src1 = $dst" in {
"cmp${cc}ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
(memop addr:$src), imm:$cc))]>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+ def CMPPSrri_alt : PSIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
+ "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
+ def CMPPSrmi_alt : PSIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
+ "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
+}
}
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
@@ -1102,7 +1123,8 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
}
// Load, store, and memory fence
-def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
+def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
+ TB, Requires<[HasSSE1]>;
// MXCSR register
def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
@@ -1130,7 +1152,7 @@ def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
//===---------------------------------------------------------------------===//
// SSE2 Instructions
@@ -1152,11 +1174,11 @@ def MOVSDrr : SDI<0x10, MRMSrcReg,
let AddedComplexity = 15 in
def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
(MOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>;
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
// Implicitly promote a 64-bit scalar to a vector.
def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>;
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
// Loading from memory automatically zeroing upper bits.
let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in
@@ -1168,15 +1190,15 @@ def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
// with SUBREG_TO_REG.
let AddedComplexity = 20 in {
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
}
// Store scalar value to memory.
@@ -1188,7 +1210,7 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSDmr addr:$dst,
- (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
// Conversion instructions
def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
@@ -1255,7 +1277,7 @@ def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
[(set GR32:$dst, (int_x86_sse2_cvtsd2si
(load addr:$src)))]>;
-// Match intrinisics which expect MM and XMM operand(s).
+// Match intrinsics which expect MM and XMM operand(s).
def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
"cvtpd2pi\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
@@ -1297,6 +1319,17 @@ let mayLoad = 1 in
def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
(outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
"cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+ def CMPSDrr_alt : SDIi8<0xC2, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
+ "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+let mayLoad = 1 in
+ def CMPSDrm_alt : SDIi8<0xC2, MRMSrcMem,
+ (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
+ "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+}
}
let Defs = [EFLAGS] in {
@@ -1311,13 +1344,13 @@ def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
// Aliases to match intrinsics which expect XMM operand(s).
let Constraints = "$src1 = $dst" in {
def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst),
+ (outs VR128:$dst),
(ins VR128:$src1, VR128:$src, SSECC:$cc),
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
VR128:$src, imm:$cc))]>;
def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst),
+ (outs VR128:$dst),
(ins VR128:$src1, f64mem:$src, SSECC:$cc),
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
@@ -1655,7 +1688,7 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
+ [(set VR128:$dst,
(int_x86_sse2_cvttps2dq VR128:$src))]>,
XS, Requires<[HasSSE2]>;
def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -1890,6 +1923,16 @@ let Constraints = "$src1 = $dst" in {
"cmp${cc}pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
(memop addr:$src), imm:$cc))]>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+ def CMPPDrri_alt : PDIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
+ "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+ def CMPPDrmi_alt : PDIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
+ "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+}
}
def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
@@ -1980,24 +2023,24 @@ let Constraints = "$src1 = $dst" in {
multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
bit Commutable = 0> {
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
let isCommutable = Commutable;
}
- def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1,
- (bitconvert (memopv2i64
+ (bitconvert (memopv2i64
addr:$src2))))]>;
}
multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr,
Intrinsic IntId, Intrinsic IntId2> {
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
@@ -2006,7 +2049,7 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1,
(bitconvert (memopv2i64 addr:$src2))))]>;
- def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
+ def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
@@ -2015,13 +2058,13 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
/// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, bit Commutable = 0> {
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
let isCommutable = Commutable;
}
- def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1,
@@ -2416,6 +2459,10 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+// Pause. This "instruction" is encoded as "rep; nop", so even though it
+// was introduced with SSE2, it's backward compatible.
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+
//TODO: custom lower this so as to never even generate the noop
def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
(i8 0)), (NOOP)>;
@@ -2462,7 +2509,7 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
(iPTR 0))), addr:$dst)]>;
def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
+ (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
@@ -2903,7 +2950,7 @@ defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
int_x86_ssse3_pmul_hr_sw,
int_x86_ssse3_pmul_hr_sw_128, 1>;
-
+
defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb",
int_x86_ssse3_pshuf_b,
int_x86_ssse3_pshuf_b_128>;
@@ -3042,10 +3089,10 @@ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
(MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
(MOVSSrr (v4f32 (V_SET0PS)),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
(MOVSSrr (v4i32 (V_SET0PI)),
- (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;
+ (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
}
// Splat v2f64 / v2i64
@@ -3181,17 +3228,17 @@ let AddedComplexity = 15 in {
// Setting the lowest element in the vector.
def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
(MOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>;
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
(MOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>;
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
Requires<[HasSSE2]>;
def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
Requires<[HasSSE2]>;
}
@@ -3464,14 +3511,14 @@ let Constraints = "$src1 = $dst" in {
let Constraints = "$src1 = $dst" in {
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, bit Commutable = 0> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
OpSize {
let isCommutable = Commutable;
}
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpNode VR128:$src1,
@@ -3949,15 +3996,15 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"#PCMPESTRM128rr PSEUDO!",
- [(set VR128:$dst,
- (int_x86_sse42_pcmpestrm128
+ [(set VR128:$dst,
+ (int_x86_sse42_pcmpestrm128
VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize;
def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"#PCMPESTRM128rm PSEUDO!",
- [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
- VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
+ [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+ VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
OpSize;
}
@@ -3972,7 +4019,7 @@ def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
let Defs = [ECX, EFLAGS] in {
multiclass SS42AI_pcmpistri<Intrinsic IntId128> {
- def rr : SS42AI<0x63, MRMSrcReg, (outs),
+ def rr : SS42AI<0x63, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
[(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
@@ -4003,7 +4050,7 @@ let Uses = [EAX, EDX] in {
def rm : SS42AI<0x61, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
- [(set ECX,
+ [(set ECX,
(IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
(implicit EFLAGS)]>, OpSize;
}
@@ -4081,16 +4128,15 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
OpSize;
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
+ (ins VR128:$src1, i8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
OpSize;
def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src1, i32i8imm:$src2),
+ (ins i128mem:$src1, i8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
imm:$src2))]>,
OpSize;
-
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index a3e04b0..98975ea 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -37,7 +37,6 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -145,6 +144,19 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
case X86::XMM7: case X86::XMM15: case X86::MM7:
return 7;
+ case X86::ES:
+ return 0;
+ case X86::CS:
+ return 1;
+ case X86::SS:
+ return 2;
+ case X86::DS:
+ return 3;
+ case X86::FS:
+ return 4;
+ case X86::GS:
+ return 5;
+
default:
assert(isVirtualRegister(RegNo) && "Unknown physical register!");
llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
@@ -158,8 +170,7 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
unsigned SubIdx) const {
switch (SubIdx) {
default: return 0;
- case 1:
- // 8-bit
+ case X86::sub_8bit:
if (B == &X86::GR8RegClass) {
if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8)
return A;
@@ -191,12 +202,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &X86::GR16_NOREXRegClass;
else if (A == &X86::GR16_ABCDRegClass)
return &X86::GR16_ABCDRegClass;
- } else if (B == &X86::FR32RegClass) {
- return A;
}
break;
- case 2:
- // 8-bit hi
+ case X86::sub_8bit_hi:
if (B == &X86::GR8_ABCD_HRegClass) {
if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
A == &X86::GR64_NOREXRegClass ||
@@ -209,12 +217,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
A == &X86::GR16_NOREXRegClass)
return &X86::GR16_ABCDRegClass;
- } else if (B == &X86::FR64RegClass) {
- return A;
}
break;
- case 3:
- // 16-bit
+ case X86::sub_16bit:
if (B == &X86::GR16RegClass) {
if (A->getSize() == 4 || A->getSize() == 8)
return A;
@@ -238,12 +243,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &X86::GR32_NOREXRegClass;
else if (A == &X86::GR32_ABCDRegClass)
return &X86::GR64_ABCDRegClass;
- } else if (B == &X86::VR128RegClass) {
- return A;
}
break;
- case 4:
- // 32-bit
+ case X86::sub_32bit:
if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) {
if (A->getSize() == 8)
return A;
@@ -261,6 +263,18 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &X86::GR64_ABCDRegClass;
}
break;
+ case X86::sub_ss:
+ if (B == &X86::FR32RegClass)
+ return A;
+ break;
+ case X86::sub_sd:
+ if (B == &X86::FR64RegClass)
+ return A;
+ break;
+ case X86::sub_xmm:
+ if (B == &X86::VR128RegClass)
+ return A;
+ break;
}
return 0;
}
@@ -518,6 +532,30 @@ X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
return Offset;
}
+static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::SUB64ri8;
+ return X86::SUB64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::SUB32ri8;
+ return X86::SUB32ri;
+ }
+}
+
+static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::ADD64ri8;
+ return X86::ADD64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::ADD32ri8;
+ return X86::ADD32ri;
+ }
+}
+
void X86RegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
@@ -537,7 +575,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineInstr *New = 0;
if (Old->getOpcode() == getCallFrameSetupOpcode()) {
New = BuildMI(MF, Old->getDebugLoc(),
- TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri),
+ TII.get(getSUBriOpcode(Is64Bit, Amount)),
StackPtr)
.addReg(StackPtr)
.addImm(Amount);
@@ -549,9 +587,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
Amount -= CalleeAmt;
if (Amount) {
- unsigned Opc = (Amount < 128) ?
- (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
- (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri);
+ unsigned Opc = getADDriOpcode(Is64Bit, Amount);
New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(Amount);
@@ -571,9 +607,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// something off the stack pointer, add it back. We do this until we have
// more advanced stack pointer tracking ability.
if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
- unsigned Opc = (CalleeAmt < 128) ?
- (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
- (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
+ unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
MachineInstr *Old = I;
MachineInstr *New =
BuildMI(MF, Old->getDebugLoc(), TII.get(Opc),
@@ -691,13 +725,9 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
const TargetInstrInfo &TII) {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
- unsigned Opc = isSub
- ? ((Offset < 128) ?
- (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
- (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri))
- : ((Offset < 128) ?
- (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
- (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri));
+ unsigned Opc = isSub ?
+ getSUBriOpcode(Is64Bit, Offset) :
+ getADDriOpcode(Is64Bit, Offset);
uint64_t Chunk = (1LL << 31) - 1;
DebugLoc DL = MBB.findDebugLoc(MBBI);
@@ -899,7 +929,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
!needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
- !MFI->hasCalls() && // No calls.
+ !MFI->adjustsStack() && // No calls.
!Subtarget->isTargetWin64()) { // Win64 has no Red Zone
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
@@ -917,7 +947,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
// size is bigger than the callers.
if (TailCallReturnAddrDelta < 0) {
MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri),
+ BuildMI(MBB, MBBI, DL,
+ TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
StackPtr)
.addReg(StackPtr)
.addImm(-TailCallReturnAddrDelta);
@@ -1307,7 +1338,7 @@ X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
// Calculate amount of bytes used for return address storing
int stackGrowth = (Is64Bit ? -8 : -4);
- // Initial state of the frame pointer is esp+4.
+ // Initial state of the frame pointer is esp+stackGrowth.
MachineLocation Dst(MachineLocation::VirtualFP);
MachineLocation Src(StackPtr, stackGrowth);
Moves.push_back(MachineMove(0, Dst, Src));
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index ac96c4c..d0b82e2 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -30,16 +30,6 @@ namespace N86 {
};
}
-namespace X86 {
- /// SubregIndex - The index of various sized subregister classes. Note that
- /// these indices must be kept in sync with the class indices in the
- /// X86RegisterInfo.td file.
- enum SubregIndex {
- SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4,
- SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3
- };
-}
-
/// DWARFFlavour - Flavour of dwarf regnumbers
///
namespace DWARFFlavour {
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 49a6ca0..91cfaa9 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -18,6 +18,17 @@
//
let Namespace = "X86" in {
+ // Subregister indices.
+ def sub_8bit : SubRegIndex;
+ def sub_8bit_hi : SubRegIndex;
+ def sub_16bit : SubRegIndex;
+ def sub_32bit : SubRegIndex;
+
+ def sub_ss : SubRegIndex;
+ def sub_sd : SubRegIndex;
+ def sub_xmm : SubRegIndex;
+
+
// In the register alias definitions below, we define which registers alias
// which others. We only specify which registers the small registers alias,
// because the register file generator is smart enough to figure out that
@@ -57,17 +68,22 @@ let Namespace = "X86" in {
def BH : Register<"bh">, DwarfRegNum<[3, 3, 3]>;
// 16-bit registers
+ let SubRegIndices = [sub_8bit, sub_8bit_hi] in {
def AX : RegisterWithSubRegs<"ax", [AL,AH]>, DwarfRegNum<[0, 0, 0]>;
def DX : RegisterWithSubRegs<"dx", [DL,DH]>, DwarfRegNum<[1, 2, 2]>;
def CX : RegisterWithSubRegs<"cx", [CL,CH]>, DwarfRegNum<[2, 1, 1]>;
def BX : RegisterWithSubRegs<"bx", [BL,BH]>, DwarfRegNum<[3, 3, 3]>;
+ }
+ let SubRegIndices = [sub_8bit] in {
def SI : RegisterWithSubRegs<"si", [SIL]>, DwarfRegNum<[4, 6, 6]>;
def DI : RegisterWithSubRegs<"di", [DIL]>, DwarfRegNum<[5, 7, 7]>;
def BP : RegisterWithSubRegs<"bp", [BPL]>, DwarfRegNum<[6, 4, 5]>;
def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>;
+ }
def IP : Register<"ip">, DwarfRegNum<[16]>;
// X86-64 only
+ let SubRegIndices = [sub_8bit] in {
def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
def R9W : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>;
def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>;
@@ -76,8 +92,9 @@ let Namespace = "X86" in {
def R13W : RegisterWithSubRegs<"r13w", [R13B]>, DwarfRegNum<[13, -2, -2]>;
def R14W : RegisterWithSubRegs<"r14w", [R14B]>, DwarfRegNum<[14, -2, -2]>;
def R15W : RegisterWithSubRegs<"r15w", [R15B]>, DwarfRegNum<[15, -2, -2]>;
-
+ }
// 32-bit registers
+ let SubRegIndices = [sub_16bit] in {
def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[0, 0, 0]>;
def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[1, 2, 2]>;
def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[2, 1, 1]>;
@@ -97,8 +114,10 @@ let Namespace = "X86" in {
def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>;
def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>;
def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>;
+ }
// 64-bit registers, X86-64 only
+ let SubRegIndices = [sub_32bit] in {
def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>;
def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>;
def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>;
@@ -117,6 +136,7 @@ let Namespace = "X86" in {
def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>;
def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>;
def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>;
+ }
// MMX Registers. These are actually aliased to ST0 .. ST7
def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>;
@@ -137,7 +157,9 @@ let Namespace = "X86" in {
def FP5 : Register<"fp5">;
def FP6 : Register<"fp6">;
- // XMM Registers, used by the various SSE instruction set extensions
+ // XMM Registers, used by the various SSE instruction set extensions.
+ // The sub_ss and sub_sd subregs are the same registers with another regclass.
+ let CompositeIndices = [(sub_ss), (sub_sd)] in {
def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>;
def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>;
def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>;
@@ -156,8 +178,10 @@ let Namespace = "X86" in {
def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
+ }
// YMM Registers, used by AVX instructions
+ let SubRegIndices = [sub_xmm] in {
def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>;
def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>;
def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>;
@@ -174,6 +198,7 @@ let Namespace = "X86" in {
def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>;
def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>;
def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>;
+ }
// Floating point stack registers
def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
@@ -207,106 +232,19 @@ let Namespace = "X86" in {
def DR7 : Register<"dr7">;
// Condition registers
- def ECR0 : Register<"ecr0">;
- def ECR1 : Register<"ecr1">;
- def ECR2 : Register<"ecr2">;
- def ECR3 : Register<"ecr3">;
- def ECR4 : Register<"ecr4">;
- def ECR5 : Register<"ecr5">;
- def ECR6 : Register<"ecr6">;
- def ECR7 : Register<"ecr7">;
-
- def RCR0 : Register<"rcr0">;
- def RCR1 : Register<"rcr1">;
- def RCR2 : Register<"rcr2">;
- def RCR3 : Register<"rcr3">;
- def RCR4 : Register<"rcr4">;
- def RCR5 : Register<"rcr5">;
- def RCR6 : Register<"rcr6">;
- def RCR7 : Register<"rcr7">;
- def RCR8 : Register<"rcr8">;
+ def CR0 : Register<"cr0">;
+ def CR1 : Register<"cr1">;
+ def CR2 : Register<"cr2">;
+ def CR3 : Register<"cr3">;
+ def CR4 : Register<"cr4">;
+ def CR5 : Register<"cr5">;
+ def CR6 : Register<"cr6">;
+ def CR7 : Register<"cr7">;
+ def CR8 : Register<"cr8">;
}
//===----------------------------------------------------------------------===//
-// Subregister Set Definitions... now that we have all of the pieces, define the
-// sub registers for each register.
-//
-
-def x86_subreg_8bit : PatLeaf<(i32 1)>;
-def x86_subreg_8bit_hi : PatLeaf<(i32 2)>;
-def x86_subreg_16bit : PatLeaf<(i32 3)>;
-def x86_subreg_32bit : PatLeaf<(i32 4)>;
-
-def x86_subreg_ss : PatLeaf<(i32 1)>;
-def x86_subreg_sd : PatLeaf<(i32 2)>;
-def x86_subreg_xmm : PatLeaf<(i32 3)>;
-
-def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI,
- R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
- [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
- R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-
-def : SubRegSet<2, [AX, CX, DX, BX],
- [AH, CH, DH, BH]>;
-
-def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
- R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
- [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
- R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-
-def : SubRegSet<2, [EAX, ECX, EDX, EBX],
- [AH, CH, DH, BH]>;
-
-def : SubRegSet<3, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
- R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
- [AX, CX, DX, BX, SP, BP, SI, DI,
- R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
-
-def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
- R8, R9, R10, R11, R12, R13, R14, R15],
- [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
- R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-
-def : SubRegSet<2, [RAX, RCX, RDX, RBX],
- [AH, CH, DH, BH]>;
-
-def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
- R8, R9, R10, R11, R12, R13, R14, R15],
- [AX, CX, DX, BX, SP, BP, SI, DI,
- R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
-
-def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
- R8, R9, R10, R11, R12, R13, R14, R15],
- [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
- R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
-
-def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
- YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
- YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
- YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-//===----------------------------------------------------------------------===//
// Register Class Definitions... now that we have all of the pieces, define the
// top-level register classes. The order specified in the register list is
// implicitly defined to be the register allocation order.
@@ -370,7 +308,7 @@ def GR8 : RegisterClass<"X86", [i8], 8,
def GR16 : RegisterClass<"X86", [i16], 16,
[AX, CX, DX, SI, DI, BX, BP, SP,
R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
- let SubRegClassList = [GR8, GR8];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -422,7 +360,7 @@ def GR16 : RegisterClass<"X86", [i16], 16,
def GR32 : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
- let SubRegClassList = [GR8, GR8, GR16];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -477,7 +415,9 @@ def GR32 : RegisterClass<"X86", [i32], 32,
def GR64 : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP, RSP, RIP]> {
- let SubRegClassList = [GR8, GR8, GR16, GR32];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+ (GR16 sub_16bit),
+ (GR32 sub_32bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -511,14 +451,8 @@ def DEBUG_REG : RegisterClass<"X86", [i32], 32,
}
// Control registers.
-def CONTROL_REG_32 : RegisterClass<"X86", [i32], 32,
- [ECR0, ECR1, ECR2, ECR3, ECR4, ECR5, ECR6,
- ECR7]> {
-}
-
-def CONTROL_REG_64 : RegisterClass<"X86", [i64], 64,
- [RCR0, RCR1, RCR2, RCR3, RCR4, RCR5, RCR6,
- RCR7, RCR8]> {
+def CONTROL_REG : RegisterClass<"X86", [i64], 64,
+ [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8]> {
}
// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
@@ -532,20 +466,27 @@ def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> {
def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> {
}
def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
- let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H];
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)];
}
def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
- let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD];
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+ (GR8_ABCD_H sub_8bit_hi),
+ (GR16_ABCD sub_16bit)];
}
def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
- let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD];
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+ (GR8_ABCD_H sub_8bit_hi),
+ (GR16_ABCD sub_16bit),
+ (GR32_ABCD sub_32bit)];
}
def GR32_TC : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> {
- let SubRegClassList = [GR8, GR8, GR16];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
}
def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
R8, R9, R11]> {
- let SubRegClassList = [GR8, GR8, GR16, GR32_TC];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+ (GR16 sub_16bit),
+ (GR32_TC sub_32bit)];
}
// GR8_NOREX - GR8 registers which do not require a REX prefix.
@@ -585,7 +526,7 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8,
// GR16_NOREX - GR16 registers which do not require a REX prefix.
def GR16_NOREX : RegisterClass<"X86", [i16], 16,
[AX, CX, DX, SI, DI, BX, BP, SP]> {
- let SubRegClassList = [GR8_NOREX, GR8_NOREX];
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -608,7 +549,8 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16,
// GR32_NOREX - GR32 registers which do not require a REX prefix.
def GR32_NOREX : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
- let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX];
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -631,7 +573,9 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32,
// GR64_NOREX - GR64 registers which do not require a REX prefix.
def GR64_NOREX : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> {
- let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit),
+ (GR32_NOREX sub_32bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -656,7 +600,7 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64,
def GR32_NOSP : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP,
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
- let SubRegClassList = [GR8, GR8, GR16];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -709,7 +653,9 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
def GR64_NOSP : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP]> {
- let SubRegClassList = [GR8, GR8, GR16, GR32_NOSP];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+ (GR16 sub_16bit),
+ (GR32_NOSP sub_32bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -734,7 +680,9 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64,
// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, RBX, RBP]> {
- let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit),
+ (GR32_NOREX sub_32bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -758,7 +706,9 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
// A class to support the 'A' assembler constraint: EAX then EDX.
def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> {
- let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD];
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+ (GR8_ABCD_H sub_8bit_hi),
+ (GR16_ABCD sub_16bit)];
}
// Scalar SSE2 floating point registers.
@@ -836,7 +786,8 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11,
XMM12, XMM13, XMM14, XMM15]> {
- let SubRegClassList = [FR32, FR64];
+ let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
+
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -856,7 +807,7 @@ def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256,
[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11,
YMM12, YMM13, YMM14, YMM15]> {
- let SubRegClassList = [FR32, FR64, VR128];
+ let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
}
// Status flags registers.
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index cd87b82..6297a27 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -12,11 +12,232 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "x86-selectiondag-info"
-#include "X86SelectionDAGInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
-X86SelectionDAGInfo::X86SelectionDAGInfo() {
+X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
+ TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<X86Subtarget>()),
+ TLI(*TM.getTargetLowering()) {
}
X86SelectionDAGInfo::~X86SelectionDAGInfo() {
}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile,
+ const Value *DstSV,
+ uint64_t DstSVOff) const {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+
+ // If not DWORD aligned or size is more than the threshold, call the library.
+ // The libc version is likely to be faster for these cases. It can use the
+ // address value and run time information about the CPU.
+ if ((Align & 3) != 0 ||
+ !ConstantSize ||
+ ConstantSize->getZExtValue() >
+ Subtarget->getMaxInlineSizeThreshold()) {
+ SDValue InFlag(0, 0);
+
+ // Check to see if there is a specialized entry-point for memory zeroing.
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+
+ if (const char *bzeroEntry = V &&
+ V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+ EVT IntPtr = TLI.getPointerTy();
+ const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false,
+ 0, CallingConv::C, false, /*isReturnValueUsed=*/false,
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), Args,
+ DAG, dl);
+ return CallResult.second;
+ }
+
+ // Otherwise have the target-independent code call memset.
+ return SDValue();
+ }
+
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ SDValue InFlag(0, 0);
+ EVT AVT;
+ SDValue Count;
+ ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
+ unsigned BytesLeft = 0;
+ bool TwoRepStos = false;
+ if (ValC) {
+ unsigned ValReg;
+ uint64_t Val = ValC->getZExtValue() & 255;
+
+ // If the value is a constant, then we can potentially use larger sets.
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ AVT = MVT::i16;
+ ValReg = X86::AX;
+ Val = (Val << 8) | Val;
+ break;
+ case 0: // DWORD aligned
+ AVT = MVT::i32;
+ ValReg = X86::EAX;
+ Val = (Val << 8) | Val;
+ Val = (Val << 16) | Val;
+ if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
+ AVT = MVT::i64;
+ ValReg = X86::RAX;
+ Val = (Val << 32) | Val;
+ }
+ break;
+ default: // Byte aligned
+ AVT = MVT::i8;
+ ValReg = X86::AL;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ break;
+ }
+
+ if (AVT.bitsGT(MVT::i8)) {
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ Count = DAG.getIntPtrConstant(SizeVal / UBytes);
+ BytesLeft = SizeVal % UBytes;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ AVT = MVT::i8;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+
+ if (TwoRepStos) {
+ InFlag = Chain.getValue(1);
+ Count = Size;
+ EVT CVT = Count.getValueType();
+ SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
+ DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+ Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
+ X86::ECX,
+ Left, InFlag);
+ InFlag = Chain.getValue(1);
+ Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+ } else if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ EVT AddrVT = Dst.getValueType();
+ EVT SizeVT = Size.getValueType();
+
+ Chain = DAG.getMemset(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
+ DAG.getConstant(Offset, AddrVT)),
+ Src,
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, isVolatile, DstSV, DstSVOff + Offset);
+ }
+
+ // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
+ return Chain;
+}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ const Value *DstSV,
+ uint64_t DstSVOff,
+ const Value *SrcSV,
+ uint64_t SrcSVOff) const {
+ // This requires the copy size to be a constant, preferrably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ /// If not DWORD aligned, call the library.
+ if ((Align & 3) != 0)
+ return SDValue();
+
+ // DWORD aligned
+ EVT AVT = MVT::i32;
+ if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
+ AVT = MVT::i64;
+
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ unsigned CountVal = SizeVal / UBytes;
+ SDValue Count = DAG.getIntPtrConstant(CountVal);
+ unsigned BytesLeft = SizeVal % UBytes;
+
+ SDValue InFlag(0, 0);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
+ X86::ESI,
+ Src, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+ SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
+ array_lengthof(Ops));
+
+ SmallVector<SDValue, 4> Results;
+ Results.push_back(RepMovs);
+ if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ EVT DstVT = Dst.getValueType();
+ EVT SrcVT = Src.getValueType();
+ EVT SizeVT = Size.getValueType();
+ Results.push_back(DAG.getMemcpy(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, DstVT, Dst,
+ DAG.getConstant(Offset, DstVT)),
+ DAG.getNode(ISD::ADD, dl, SrcVT, Src,
+ DAG.getConstant(Offset, SrcVT)),
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, isVolatile, AlwaysInline,
+ DstSV, DstSVOff + Offset,
+ SrcSV, SrcSVOff + Offset));
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Results[0], Results.size());
+}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index 9834754..4f30f31 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -18,10 +18,40 @@
namespace llvm {
+class X86TargetLowering;
+class X86TargetMachine;
+class X86Subtarget;
+
class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ const X86TargetLowering &TLI;
+
public:
- X86SelectionDAGInfo();
+ explicit X86SelectionDAGInfo(const X86TargetMachine &TM);
~X86SelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile,
+ const Value *DstSV,
+ uint64_t DstSVOff) const;
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ const Value *DstSV,
+ uint64_t DstSVOff,
+ const Value *SrcSV,
+ uint64_t SrcSVOff) const;
};
}
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f39904e..f2c5058 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -17,17 +17,13 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Support/CommandLine.h"
-
using namespace llvm;
-static cl::opt<bool> DisableSSEDomain("disable-sse-domain",
- cl::init(false), cl::Hidden,
- cl::desc("Disable SSE Domain Fixing"));
-
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
@@ -43,6 +39,18 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
}
}
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+ MCContext &Ctx, TargetAsmBackend &TAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll) {
+ Triple TheTriple(TT);
+ switch (TheTriple.getOS()) {
+ default:
+ return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+ }
+}
+
extern "C" void LLVMInitializeX86Target() {
// Register the target.
RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
@@ -63,6 +71,12 @@ extern "C" void LLVMInitializeX86Target() {
createX86_32AsmBackend);
TargetRegistry::RegisterAsmBackend(TheX86_64Target,
createX86_64AsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterObjectStreamer(TheX86_32Target,
+ createMCStreamer);
+ TargetRegistry::RegisterObjectStreamer(TheX86_64Target,
+ createMCStreamer);
}
@@ -88,7 +102,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
Subtarget.getStackAlignment(),
(Subtarget.isTargetWin64() ? -40 :
(Subtarget.is64Bit() ? -8 : -4))),
- InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) {
+ InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this),
+ ELFWriterInfo(*this) {
DefRelocModel = getRelocationModel();
// If no relocation model was picked, default as appropriate for the target.
@@ -178,8 +193,7 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2() &&
- !DisableSSEDomain) {
+ if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
PM.add(createSSEDomainFixPass());
return true;
}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index dc4234c..f9fb424 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -23,6 +23,7 @@
#include "X86JITInfo.h"
#include "X86Subtarget.h"
#include "X86ISelLowering.h"
+#include "X86SelectionDAGInfo.h"
namespace llvm {
@@ -35,6 +36,7 @@ class X86TargetMachine : public LLVMTargetMachine {
X86InstrInfo InstrInfo;
X86JITInfo JITInfo;
X86TargetLowering TLInfo;
+ X86SelectionDAGInfo TSInfo;
X86ELFWriterInfo ELFWriterInfo;
Reloc::Model DefRelocModel; // Reloc model before it's overridden.
@@ -54,6 +56,9 @@ public:
virtual const X86TargetLowering *getTargetLowering() const {
return &TLInfo;
}
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
virtual const X86RegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
OpenPOWER on IntegriCloud