diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2010-05-27 15:15:58 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2010-05-27 15:15:58 +0000 |
commit | 1e3dec662ea18131c495db50caccc57f77b7a5fe (patch) | |
tree | 9fad9a5d5dd8c4ff54af48edad9c8cc26dd5fda1 /lib/Target/X86 | |
parent | 377552607e51dc1d3e6ff33833f9620bcfe815ac (diff) | |
download | FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.zip FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.tar.gz |
Update LLVM to r104832.
Diffstat (limited to 'lib/Target/X86')
32 files changed, 1532 insertions, 851 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 6b403c1..40a6a7b 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -186,20 +186,73 @@ struct X86Operand : public MCParsedAsmOperand { bool isImm() const { return Kind == Immediate; } - bool isImmSExt8() const { - // Accept immediates which fit in 8 bits when sign extended, and - // non-absolute immediates. + bool isImmSExti16i8() const { if (!isImm()) return false; - if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) { - int64_t Value = CE->getValue(); - return Value == (int64_t) (int8_t) Value; - } + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) + return true; - return true; + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000000000007FULL)|| + (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); } - + bool isImmSExti32i8() const { + if (!isImm()) + return false; + + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) + return true; + + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000000000007FULL)|| + (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + } + bool isImmSExti64i8() const { + if (!isImm()) + return false; + + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) + return true; + + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000000000007FULL)|| + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + } + bool isImmSExti64i32() const { + if (!isImm()) + return false; + + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) + return true; + + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000007FFFFFFFULL)|| + (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + } + bool isMem() const { return Kind == Memory; } bool isAbsMem() const { @@ -231,12 +284,6 @@ struct X86Operand : public MCParsedAsmOperand { addExpr(Inst, getImm()); } - void addImmSExt8Operands(MCInst &Inst, unsigned N) const { - // FIXME: Support user customization of the render method. - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - void addMemOperands(MCInst &Inst, unsigned N) const { assert((N == 5) && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); @@ -535,6 +582,21 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { bool X86ATTAsmParser:: ParseInstruction(const StringRef &Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // The various flavors of pushf and popf use Requires<In32BitMode> and + // Requires<In64BitMode>, but the assembler doesn't yet implement that. + // For now, just do a manual check to prevent silent misencoding. + if (Is64Bit) { + if (Name == "popfl") + return Error(NameLoc, "popfl cannot be encoded in 64-bit mode"); + else if (Name == "pushfl") + return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode"); + } else { + if (Name == "popfq") + return Error(NameLoc, "popfq cannot be encoded in 32-bit mode"); + else if (Name == "pushfq") + return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode"); + } + // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to // represent alternative syntaxes in the .td file, without requiring // instruction duplication. @@ -547,9 +609,66 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, .Case("repe", "rep") .Case("repz", "rep") .Case("repnz", "repne") + .Case("pushf", Is64Bit ? "pushfq" : "pushfl") + .Case("popf", Is64Bit ? "popfq" : "popfl") + .Case("retl", Is64Bit ? "retl" : "ret") + .Case("retq", Is64Bit ? "ret" : "retq") + .Case("setz", "sete") + .Case("setnz", "setne") + .Case("jz", "je") + .Case("jnz", "jne") + .Case("cmovcl", "cmovbl") + .Case("cmovcl", "cmovbl") + .Case("cmovnal", "cmovbel") + .Case("cmovnbl", "cmovael") + .Case("cmovnbel", "cmoval") + .Case("cmovncl", "cmovael") + .Case("cmovngl", "cmovlel") + .Case("cmovnl", "cmovgel") + .Case("cmovngl", "cmovlel") + .Case("cmovngel", "cmovll") + .Case("cmovnll", "cmovgel") + .Case("cmovnlel", "cmovgl") + .Case("cmovnzl", "cmovnel") + .Case("cmovzl", "cmovel") .Default(Name); + + // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. + const MCExpr *ExtraImmOp = 0; + if (PatchedName.startswith("cmp") && + (PatchedName.endswith("ss") || PatchedName.endswith("sd") || + PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { + unsigned SSEComparisonCode = StringSwitch<unsigned>( + PatchedName.slice(3, PatchedName.size() - 2)) + .Case("eq", 0) + .Case("lt", 1) + .Case("le", 2) + .Case("unord", 3) + .Case("neq", 4) + .Case("nlt", 5) + .Case("nle", 6) + .Case("ord", 7) + .Default(~0U); + if (SSEComparisonCode != ~0U) { + ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, + getParser().getContext()); + if (PatchedName.endswith("ss")) { + PatchedName = "cmpss"; + } else if (PatchedName.endswith("sd")) { + PatchedName = "cmpsd"; + } else if (PatchedName.endswith("ps")) { + PatchedName = "cmpps"; + } else { + assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); + PatchedName = "cmppd"; + } + } + } Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); + if (ExtraImmOp) + Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); + if (getLexer().isNot(AsmToken::EndOfStatement)) { // Parse '*' modifier. @@ -564,7 +683,7 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, Operands.push_back(Op); else return true; - + while (getLexer().is(AsmToken::Comma)) { Parser.Lex(); // Eat the comma. @@ -587,6 +706,17 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, Operands.erase(Operands.begin() + 1); } + // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as + // "f{mul*,add*,sub*,div*} $op" + if ((Name.startswith("fmul") || Name.startswith("fadd") || + Name.startswith("fsub") || Name.startswith("fdiv")) && + Operands.size() == 3 && + static_cast<X86Operand*>(Operands[2])->isReg() && + static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) { + delete Operands[2]; + Operands.erase(Operands.begin() + 2); + } + return false; } @@ -622,6 +752,31 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { return false; } +/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a +/// imm operand, to having "rm" or "mr" operands with the offset in the disp +/// field. +static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo, + bool isMR) { + MCOperand Disp = Inst.getOperand(0); + + // Start over with an empty instruction. + Inst = MCInst(); + Inst.setOpcode(Opc); + + if (!isMR) + Inst.addOperand(MCOperand::CreateReg(RegNo)); + + // Add the mem operand. + Inst.addOperand(MCOperand::CreateReg(0)); // Segment + Inst.addOperand(MCOperand::CreateImm(1)); // Scale + Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg + Inst.addOperand(Disp); // Displacement + Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg + + if (isMR) + Inst.addOperand(MCOperand::CreateReg(RegNo)); +} + // FIXME: Custom X86 cleanup function to implement a temporary hack to handle // matching INCL/DECL correctly for x86_64. This needs to be replaced by a // proper mechanism for supporting (ambiguous) feature dependent instructions. @@ -637,6 +792,14 @@ void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) { case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break; case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break; case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break; + + // moffset instructions are x86-32 only. + case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break; + case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break; + case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break; + case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break; + case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break; + case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break; } } @@ -673,6 +836,8 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> bool MatchW = MatchInstructionImpl(Operands, Inst); Tmp[Base.size()] = 'l'; bool MatchL = MatchInstructionImpl(Operands, Inst); + Tmp[Base.size()] = 'q'; + bool MatchQ = MatchInstructionImpl(Operands, Inst); // Restore the old token. Op->setTokenValue(Base); @@ -680,7 +845,7 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> // If exactly one matched, then we treat that as a successful match (and the // instruction will already have been filled in correctly, since the failing // matches won't have modified it). - if (MatchB + MatchW + MatchL == 2) + if (MatchB + MatchW + MatchL + MatchQ == 3) return false; // Otherwise, the match failed. diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 8b0ed1c..183213d 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -58,12 +58,11 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); if (Subtarget->isTargetCOFF()) { - const Function *F = MF.getFunction(); - OutStreamer.EmitRawText("\t.def\t " + Twine(CurrentFnSym->getName()) + - ";\t.scl\t" + - Twine(F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) + - ";\t.type\t" + Twine(COFF::DT_FCN << COFF::N_BTSHFT) - + ";\t.endef"); + bool Intrn = MF.getFunction()->hasInternalLinkage(); + OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); + OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::C_STAT : COFF::C_EXT); + OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT); + OutStreamer.EndCOFFSymbolDef(); } // Have common code print out the function header with linkage info etc. @@ -571,44 +570,55 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); // Emit type information for external functions - for (X86COFFMachineModuleInfo::stub_iterator I = COFFMMI.stub_begin(), - E = COFFMMI.stub_end(); I != E; ++I) { - OutStreamer.EmitRawText("\t.def\t " + Twine(I->getKeyData()) + - ";\t.scl\t" + Twine(COFF::C_EXT) + - ";\t.type\t" + - Twine(COFF::DT_FCN << COFF::N_BTSHFT) + - ";\t.endef"); + typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator; + for (externals_iterator I = COFFMMI.externals_begin(), + E = COFFMMI.externals_end(); + I != E; ++I) { + OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); + OutStreamer.EmitCOFFSymbolStorageClass(COFF::C_EXT); + OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT); + OutStreamer.EndCOFFSymbolDef(); } - if (Subtarget->isTargetCygMing()) { - // Necessary for dllexport support - std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals; - - const TargetLoweringObjectFileCOFF &TLOFCOFF = - static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering()); - - for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) - if (I->hasDLLExportLinkage()) - DLLExportedFns.push_back(Mang->getSymbol(I)); - - for (Module::const_global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) - if (I->hasDLLExportLinkage()) - DLLExportedGlobals.push_back(Mang->getSymbol(I)); - - // Output linker support code for dllexported globals on windows. - if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) { - OutStreamer.SwitchSection(TLOFCOFF.getCOFFSection(".section .drectve", - true, - SectionKind::getMetadata())); - for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) - OutStreamer.EmitRawText("\t.ascii \" -export:" + - Twine(DLLExportedGlobals[i]->getName()) + - ",data\""); - - for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) - OutStreamer.EmitRawText("\t.ascii \" -export:" + - Twine(DLLExportedFns[i]->getName()) + "\""); + // Necessary for dllexport support + std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals; + + const TargetLoweringObjectFileCOFF &TLOFCOFF = + static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering()); + + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->hasDLLExportLinkage()) + DLLExportedFns.push_back(Mang->getSymbol(I)); + + for (Module::const_global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) + if (I->hasDLLExportLinkage()) + DLLExportedGlobals.push_back(Mang->getSymbol(I)); + + // Output linker support code for dllexported globals on windows. + if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) { + OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection()); + SmallString<128> name; + for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) { + if (Subtarget->isTargetWindows()) + name = " /EXPORT:"; + else + name = " -export:"; + name += DLLExportedGlobals[i]->getName(); + if (Subtarget->isTargetWindows()) + name += ",DATA"; + else + name += ",data"; + OutStreamer.EmitBytes(name, 0); + } + + for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) { + if (Subtarget->isTargetWindows()) + name = " /EXPORT:"; + else + name = " -export:"; + name += DLLExportedFns[i]->getName(); + OutStreamer.EmitBytes(name, 0); } } } diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h index 95984b2..b5a7f8d 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h @@ -31,7 +31,7 @@ class MCInst; class MCStreamer; class MCSymbol; -class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { +class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { const X86Subtarget *Subtarget; public: explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index effc8ed..4edeca9 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -224,6 +224,60 @@ static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) { OutMI.addOperand(OutMI.getOperand(0)); } +/// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with +/// a short fixed-register form. +static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) { + unsigned ImmOp = Inst.getNumOperands() - 1; + assert(Inst.getOperand(0).isReg() && Inst.getOperand(ImmOp).isImm() && + ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() && + Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) || + Inst.getNumOperands() == 2) && "Unexpected instruction!"); + + // Check whether the destination register can be fixed. + unsigned Reg = Inst.getOperand(0).getReg(); + if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX) + return; + + // If so, rewrite the instruction. + MCOperand Saved = Inst.getOperand(ImmOp); + Inst = MCInst(); + Inst.setOpcode(Opcode); + Inst.addOperand(Saved); +} + +/// \brief Simplify things like MOV32rm to MOV32o32a. +static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) { + bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg(); + unsigned AddrBase = IsStore; + unsigned RegOp = IsStore ? 0 : 5; + unsigned AddrOp = AddrBase + 3; + assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() && + Inst.getOperand(AddrBase + 0).isReg() && // base + Inst.getOperand(AddrBase + 1).isImm() && // scale + Inst.getOperand(AddrBase + 2).isReg() && // index register + (Inst.getOperand(AddrOp).isExpr() || // address + Inst.getOperand(AddrOp).isImm())&& + Inst.getOperand(AddrBase + 4).isReg() && // segment + "Unexpected instruction!"); + + // Check whether the destination register can be fixed. + unsigned Reg = Inst.getOperand(RegOp).getReg(); + if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX) + return; + + // Check whether this is an absolute address. + if (Inst.getOperand(AddrBase + 0).getReg() != 0 || + Inst.getOperand(AddrBase + 2).getReg() != 0 || + Inst.getOperand(AddrBase + 4).getReg() != 0 || + Inst.getOperand(AddrBase + 1).getImm() != 1) + return; + + // If so, rewrite the instruction. + MCOperand Saved = Inst.getOperand(AddrOp); + Inst = MCInst(); + Inst.setOpcode(Opcode); + Inst.addOperand(Saved); +} void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); @@ -309,8 +363,32 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV64r0 -> MOV32r0 LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; - - + + // TAILJMPr, TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have + // register inputs modeled as normal uses instead of implicit uses. As such, + // truncate off all but the first operand (the callee). FIXME: Change isel. + case X86::TAILJMPr: + case X86::TAILJMPr64: + case X86::CALL64r: + case X86::CALL64pcrel32: { + unsigned Opcode = OutMI.getOpcode(); + MCOperand Saved = OutMI.getOperand(0); + OutMI = MCInst(); + OutMI.setOpcode(Opcode); + OutMI.addOperand(Saved); + break; + } + + // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions. + case X86::TAILJMPd: + case X86::TAILJMPd64: { + MCOperand Saved = OutMI.getOperand(0); + OutMI = MCInst(); + OutMI.setOpcode(X86::TAILJMP_1); + OutMI.addOperand(Saved); + break; + } + // The assembler backend wants to see branches in their small form and relax // them to their large form. The JIT can only handle the large form because // it does not do relaxation. For now, translate the large form to the @@ -332,6 +410,61 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break; case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break; case X86::JG_4: OutMI.setOpcode(X86::JG_1); break; + + // We don't currently select the correct instruction form for instructions + // which have a short %eax, etc. form. Handle this by custom lowering, for + // now. + // + // Note, we are currently not handling the following instructions: + // MOV64ao8, MOV64o8a + // XCHG16ar, XCHG32ar, XCHG64ar + case X86::MOV8mr_NOREX: + case X86::MOV8mr: SimplifyShortMoveForm(OutMI, X86::MOV8ao8); break; + case X86::MOV8rm_NOREX: + case X86::MOV8rm: SimplifyShortMoveForm(OutMI, X86::MOV8o8a); break; + case X86::MOV16mr: SimplifyShortMoveForm(OutMI, X86::MOV16ao16); break; + case X86::MOV16rm: SimplifyShortMoveForm(OutMI, X86::MOV16o16a); break; + case X86::MOV32mr: SimplifyShortMoveForm(OutMI, X86::MOV32ao32); break; + case X86::MOV32rm: SimplifyShortMoveForm(OutMI, X86::MOV32o32a); break; + case X86::MOV64mr: SimplifyShortMoveForm(OutMI, X86::MOV64ao64); break; + case X86::MOV64rm: SimplifyShortMoveForm(OutMI, X86::MOV64o64a); break; + + case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break; + case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break; + case X86::ADC32ri: SimplifyShortImmForm(OutMI, X86::ADC32i32); break; + case X86::ADC64ri32: SimplifyShortImmForm(OutMI, X86::ADC64i32); break; + case X86::ADD8ri: SimplifyShortImmForm(OutMI, X86::ADD8i8); break; + case X86::ADD16ri: SimplifyShortImmForm(OutMI, X86::ADD16i16); break; + case X86::ADD32ri: SimplifyShortImmForm(OutMI, X86::ADD32i32); break; + case X86::ADD64ri32: SimplifyShortImmForm(OutMI, X86::ADD64i32); break; + case X86::AND8ri: SimplifyShortImmForm(OutMI, X86::AND8i8); break; + case X86::AND16ri: SimplifyShortImmForm(OutMI, X86::AND16i16); break; + case X86::AND32ri: SimplifyShortImmForm(OutMI, X86::AND32i32); break; + case X86::AND64ri32: SimplifyShortImmForm(OutMI, X86::AND64i32); break; + case X86::CMP8ri: SimplifyShortImmForm(OutMI, X86::CMP8i8); break; + case X86::CMP16ri: SimplifyShortImmForm(OutMI, X86::CMP16i16); break; + case X86::CMP32ri: SimplifyShortImmForm(OutMI, X86::CMP32i32); break; + case X86::CMP64ri32: SimplifyShortImmForm(OutMI, X86::CMP64i32); break; + case X86::OR8ri: SimplifyShortImmForm(OutMI, X86::OR8i8); break; + case X86::OR16ri: SimplifyShortImmForm(OutMI, X86::OR16i16); break; + case X86::OR32ri: SimplifyShortImmForm(OutMI, X86::OR32i32); break; + case X86::OR64ri32: SimplifyShortImmForm(OutMI, X86::OR64i32); break; + case X86::SBB8ri: SimplifyShortImmForm(OutMI, X86::SBB8i8); break; + case X86::SBB16ri: SimplifyShortImmForm(OutMI, X86::SBB16i16); break; + case X86::SBB32ri: SimplifyShortImmForm(OutMI, X86::SBB32i32); break; + case X86::SBB64ri32: SimplifyShortImmForm(OutMI, X86::SBB64i32); break; + case X86::SUB8ri: SimplifyShortImmForm(OutMI, X86::SUB8i8); break; + case X86::SUB16ri: SimplifyShortImmForm(OutMI, X86::SUB16i16); break; + case X86::SUB32ri: SimplifyShortImmForm(OutMI, X86::SUB32i32); break; + case X86::SUB64ri32: SimplifyShortImmForm(OutMI, X86::SUB64i32); break; + case X86::TEST8ri: SimplifyShortImmForm(OutMI, X86::TEST8i8); break; + case X86::TEST16ri: SimplifyShortImmForm(OutMI, X86::TEST16i16); break; + case X86::TEST32ri: SimplifyShortImmForm(OutMI, X86::TEST32i32); break; + case X86::TEST64ri32: SimplifyShortImmForm(OutMI, X86::TEST64i32); break; + case X86::XOR8ri: SimplifyShortImmForm(OutMI, X86::XOR8i8); break; + case X86::XOR16ri: SimplifyShortImmForm(OutMI, X86::XOR16i16); break; + case X86::XOR32ri: SimplifyShortImmForm(OutMI, X86::XOR32i32); break; + case X86::XOR64ri32: SimplifyShortImmForm(OutMI, X86::XOR64i32); break; } } @@ -346,7 +479,7 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, // cast away const; DIetc do not take const operands for some reason. DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); if (V.getContext().isSubprogram()) - O << DISubprogram(V.getContext().getNode()).getDisplayName() << ":"; + O << DISubprogram(V.getContext()).getDisplayName() << ":"; O << V.getName(); O << " <- "; // Frame address. Currently handles register +- offset only. diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h index ebd23f6..9e5474f 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h @@ -25,7 +25,7 @@ namespace llvm { class X86Subtarget; /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. -class VISIBILITY_HIDDEN X86MCInstLower { +class LLVM_LIBRARY_VISIBILITY X86MCInstLower { MCContext &Ctx; Mangler *Mang; X86AsmPrinter &AsmPrinter; diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index da6bb91..1334820 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -39,7 +39,12 @@ set(sources if( CMAKE_CL_64 ) enable_language(ASM_MASM) - set(sources ${sources} X86CompilationCallback_Win64.asm) + ADD_CUSTOM_COMMAND( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj + COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm + ) + set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj) endif() add_llvm_target(X86CodeGen ${sources}) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 62e7357..8a5a630 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -155,7 +155,57 @@ static void translateRegister(MCInst &mcInst, Reg reg) { /// /// @param mcInst - The MCInst to append to. /// @param immediate - The immediate value to append. -static void translateImmediate(MCInst &mcInst, uint64_t immediate) { +/// @param operand - The operand, as stored in the descriptor table. +/// @param insn - The internal instruction. +static void translateImmediate(MCInst &mcInst, + uint64_t immediate, + OperandSpecifier &operand, + InternalInstruction &insn) { + // Sign-extend the immediate if necessary. + + OperandType type = operand.type; + + if (type == TYPE_RELv) { + switch (insn.displacementSize) { + default: + break; + case 8: + type = TYPE_MOFFS8; + break; + case 16: + type = TYPE_MOFFS16; + break; + case 32: + type = TYPE_MOFFS32; + break; + case 64: + type = TYPE_MOFFS64; + break; + } + } + + switch (type) { + case TYPE_MOFFS8: + case TYPE_REL8: + if(immediate & 0x80) + immediate |= ~(0xffull); + break; + case TYPE_MOFFS16: + if(immediate & 0x8000) + immediate |= ~(0xffffull); + break; + case TYPE_MOFFS32: + case TYPE_REL32: + case TYPE_REL64: + if(immediate & 0x80000000) + immediate |= ~(0xffffffffull); + break; + case TYPE_MOFFS64: + default: + // operand is 64 bits wide. Do nothing. + break; + } + mcInst.addOperand(MCOperand::CreateImm(immediate)); } @@ -370,8 +420,7 @@ static bool translateRM(MCInst &mcInst, case TYPE_XMM64: case TYPE_XMM128: case TYPE_DEBUGREG: - case TYPE_CR32: - case TYPE_CR64: + case TYPE_CONTROLREG: return translateRMRegister(mcInst, insn); case TYPE_M: case TYPE_M8: @@ -447,8 +496,10 @@ static bool translateOperand(MCInst &mcInst, case ENCODING_IO: case ENCODING_Iv: case ENCODING_Ia: - translateImmediate(mcInst, - insn.immediates[insn.numImmediatesTranslated++]); + translateImmediate(mcInst, + insn.immediates[insn.numImmediatesTranslated++], + operand, + insn); return false; case ENCODING_RB: case ENCODING_RW: diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index 64f6b2d..6c3ff6b 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1034,14 +1034,10 @@ static int readModRM(struct InternalInstruction* insn) { if (index > 7) \ *valid = 0; \ return prefix##_DR0 + index; \ - case TYPE_CR32: \ - if (index > 7) \ - *valid = 0; \ - return prefix##_ECR0 + index; \ - case TYPE_CR64: \ + case TYPE_CONTROLREG: \ if (index > 8) \ *valid = 0; \ - return prefix##_RCR0 + index; \ + return prefix##_CR0 + index; \ } \ } diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 462cf68..28ba86b 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -225,26 +225,16 @@ extern "C" { ENTRY(DR6) \ ENTRY(DR7) -#define REGS_CONTROL_32BIT \ - ENTRY(ECR0) \ - ENTRY(ECR1) \ - ENTRY(ECR2) \ - ENTRY(ECR3) \ - ENTRY(ECR4) \ - ENTRY(ECR5) \ - ENTRY(ECR6) \ - ENTRY(ECR7) - -#define REGS_CONTROL_64BIT \ - ENTRY(RCR0) \ - ENTRY(RCR1) \ - ENTRY(RCR2) \ - ENTRY(RCR3) \ - ENTRY(RCR4) \ - ENTRY(RCR5) \ - ENTRY(RCR6) \ - ENTRY(RCR7) \ - ENTRY(RCR8) +#define REGS_CONTROL \ + ENTRY(CR0) \ + ENTRY(CR1) \ + ENTRY(CR2) \ + ENTRY(CR3) \ + ENTRY(CR4) \ + ENTRY(CR5) \ + ENTRY(CR6) \ + ENTRY(CR7) \ + ENTRY(CR8) #define ALL_EA_BASES \ EA_BASES_16BIT \ @@ -264,8 +254,7 @@ extern "C" { REGS_XMM \ REGS_SEGMENT \ REGS_DEBUG \ - REGS_CONTROL_32BIT \ - REGS_CONTROL_64BIT \ + REGS_CONTROL \ ENTRY(RIP) /* diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 4a7cd57..0f33f52 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -280,8 +280,7 @@ struct ContextDecision { ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \ ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \ - ENUM_ENTRY(TYPE_CR32, "4-byte control register operand") \ - ENUM_ENTRY(TYPE_CR64, "8-byte") \ + ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \ \ ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \ ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \ diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp index 5e80845..dab070e 100644 --- a/lib/Target/X86/SSEDomainFix.cpp +++ b/lib/Target/X86/SSEDomainFix.cpp @@ -155,9 +155,7 @@ char SSEDomainFixPass::ID = 0; /// Translate TRI register number to an index into our smaller tables of /// interesting registers. Return -1 for boring registers. int SSEDomainFixPass::RegIndex(unsigned reg) { - // Registers are sorted lexicographically. - // We just need them to be consecutive, ordering doesn't matter. - assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort"); + assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort"); reg -= X86::XMM0; return reg < NumRegs ? (int) reg : -1; } diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index ba9c1d0..151087f 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -12,6 +12,7 @@ #include "X86FixupKinds.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" @@ -43,20 +44,19 @@ public: X86AsmBackend(const Target &T) : TargetAsmBackend(T) {} - void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF, + void ApplyFixup(const MCFixup &Fixup, MCDataFragment &DF, uint64_t Value) const { - unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind); + unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind()); - assert(Fixup.Offset + Size <= DF.getContents().size() && + assert(Fixup.getOffset() + Size <= DF.getContents().size() && "Invalid fixup offset!"); for (unsigned i = 0; i != Size; ++i) - DF.getContents()[Fixup.Offset + i] = uint8_t(Value >> (i * 8)); + DF.getContents()[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); } - bool MayNeedRelaxation(const MCInst &Inst, - const SmallVectorImpl<MCAsmFixup> &Fixups) const; + bool MayNeedRelaxation(const MCInst &Inst) const; - void RelaxInstruction(const MCInstFragment *IF, MCInst &Res) const; + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; }; @@ -75,6 +75,7 @@ static unsigned getRelaxedOpcode(unsigned Op) { case X86::JG_1: return X86::JG_4; case X86::JLE_1: return X86::JLE_4; case X86::JL_1: return X86::JL_4; + case X86::TAILJMP_1: case X86::JMP_1: return X86::JMP_4; case X86::JNE_1: return X86::JNE_4; case X86::JNO_1: return X86::JNO_4; @@ -86,35 +87,33 @@ static unsigned getRelaxedOpcode(unsigned Op) { } } -bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst, - const SmallVectorImpl<MCAsmFixup> &Fixups) const { - // Check for a 1byte pcrel fixup, and enforce that we would know how to relax - // this instruction. - for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { - if (unsigned(Fixups[i].Kind) == X86::reloc_pcrel_1byte) { - assert(getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode()); - return true; - } - } +bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const { + // Check if this instruction is ever relaxable. + if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode()) + return false; - return false; + // If so, just assume it can be relaxed. Once we support relaxing more complex + // instructions we should check that the instruction actually has symbolic + // operands before doing this, but we need to be careful about things like + // PCrel. + return true; } // FIXME: Can tblgen help at all here to verify there aren't other instructions // we can relax? -void X86AsmBackend::RelaxInstruction(const MCInstFragment *IF, - MCInst &Res) const { +void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. - unsigned RelaxedOp = getRelaxedOpcode(IF->getInst().getOpcode()); + unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode()); - if (RelaxedOp == IF->getInst().getOpcode()) { + if (RelaxedOp == Inst.getOpcode()) { SmallString<256> Tmp; raw_svector_ostream OS(Tmp); - IF->getInst().dump_pretty(OS); + Inst.dump_pretty(OS); + OS << "\n"; report_fatal_error("unexpected instruction to relax: " + OS.str()); } - Res = IF->getInst(); + Res = Inst; Res.setOpcode(RelaxedOp); } @@ -199,6 +198,18 @@ public: } }; +class ELFX86_32AsmBackend : public ELFX86AsmBackend { +public: + ELFX86_32AsmBackend(const Target &T) + : ELFX86AsmBackend(T) {} +}; + +class ELFX86_64AsmBackend : public ELFX86AsmBackend { +public: + ELFX86_64AsmBackend(const Target &T) + : ELFX86AsmBackend(T) {} +}; + class DarwinX86AsmBackend : public X86AsmBackend { public: DarwinX86AsmBackend(const Target &T) @@ -210,7 +221,8 @@ public: bool isVirtualSection(const MCSection &Section) const { const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); return (SMO.getType() == MCSectionMachO::S_ZEROFILL || - SMO.getType() == MCSectionMachO::S_GB_ZEROFILL); + SMO.getType() == MCSectionMachO::S_GB_ZEROFILL || + SMO.getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL); } }; @@ -247,6 +259,26 @@ public: const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); return SMO.getType() == MCSectionMachO::S_CSTRING_LITERALS; } + + virtual bool isSectionAtomizable(const MCSection &Section) const { + const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); + // Fixed sized data sections are uniqued, they cannot be diced into atoms. + switch (SMO.getType()) { + default: + return true; + + case MCSectionMachO::S_4BYTE_LITERALS: + case MCSectionMachO::S_8BYTE_LITERALS: + case MCSectionMachO::S_16BYTE_LITERALS: + case MCSectionMachO::S_LITERAL_POINTERS: + case MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS: + case MCSectionMachO::S_LAZY_SYMBOL_POINTERS: + case MCSectionMachO::S_MOD_INIT_FUNC_POINTERS: + case MCSectionMachO::S_MOD_TERM_FUNC_POINTERS: + case MCSectionMachO::S_INTERPOSING: + return false; + } + } }; } @@ -257,7 +289,7 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, case Triple::Darwin: return new DarwinX86_32AsmBackend(T); default: - return new ELFX86AsmBackend(T); + return new ELFX86_32AsmBackend(T); } } @@ -267,6 +299,6 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, case Triple::Darwin: return new DarwinX86_64AsmBackend(T); default: - return new ELFX86AsmBackend(T); + return new ELFX86_64AsmBackend(T); } } diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index eece462..98ab2a6 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -15,7 +15,7 @@ #define X86COFF_MACHINEMODULEINFO_H #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/DenseSet.h" #include "X86MachineFunctionInfo.h" namespace llvm { @@ -25,18 +25,18 @@ namespace llvm { /// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation /// for X86 COFF targets. class X86COFFMachineModuleInfo : public MachineModuleInfoImpl { - StringSet<> CygMingStubs; + DenseSet<MCSymbol const *> Externals; public: X86COFFMachineModuleInfo(const MachineModuleInfo &) {} virtual ~X86COFFMachineModuleInfo(); - void addExternalFunction(StringRef Name) { - CygMingStubs.insert(Name); + void addExternalFunction(MCSymbol* Symbol) { + Externals.insert(Symbol); } - typedef StringSet<>::const_iterator stub_iterator; - stub_iterator stub_begin() const { return CygMingStubs.begin(); } - stub_iterator stub_end() const { return CygMingStubs.end(); } + typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator; + externals_iterator externals_begin() const { return Externals.begin(); } + externals_iterator externals_end() const { return Externals.end(); } }; diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index fd15efd..a5774e1 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -307,6 +307,20 @@ def CC_X86_32_FastCall : CallingConv<[ CCDelegateTo<CC_X86_32_Common> ]>; +def CC_X86_32_ThisCall : CallingConv<[ + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType<i32>>, + + // The 'nest' parameter, if any, is passed in EAX. + CCIfNest<CCAssignToReg<[EAX]>>, + + // The first integer argument is passed in ECX + CCIfType<[i32], CCAssignToReg<[ECX]>>, + + // Otherwise, same as everything else. + CCDelegateTo<CC_X86_32_Common> +]>; + def CC_X86_32_FastCC : CallingConv<[ // Handles byval parameters. Note that we can't rely on the delegation // to CC_X86_32_Common for this because that happens after code that diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index ff9208c..1bc5eb7 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -180,6 +180,8 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC, if (CC == CallingConv::X86_FastCall) return CC_X86_32_FastCall; + else if (CC == CallingConv::X86_ThisCall) + return CC_X86_32_ThisCall; else if (CC == CallingConv::Fast) return CC_X86_32_FastCC; else if (CC == CallingConv::GHC) @@ -324,7 +326,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg) { - unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src); + unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, + Src, /*TODO: Kill=*/false); if (RR != 0) { ResultReg = RR; @@ -416,7 +419,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { (S == 1 || S == 2 || S == 4 || S == 8)) { // Scaled-index addressing. Scale = S; - IndexReg = getRegForGEPIndex(Op); + IndexReg = getRegForGEPIndex(Op).first; if (IndexReg == 0) return false; } else @@ -802,7 +805,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) { unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Set the high bits to zero. - ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg); + ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); if (ResultReg == 0) return false; UpdateValueMap(I, ResultReg); return true; @@ -913,7 +916,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) { const MachineInstr &MI = *RI; - if (MI.modifiesRegister(Reg)) { + if (MI.definesRegister(Reg)) { unsigned Src, Dst, SrcSR, DstSR; if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) { @@ -1019,14 +1022,14 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (Op1Reg == 0) return false; - TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC); + TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC, DL); // The shift instruction uses X86::CL. If we defined a super-register // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what // we're doing here. if (CReg != X86::CL) BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL) - .addReg(CReg).addImm(X86::SUBREG_8BIT); + .addReg(CReg).addImm(X86::sub_8bit); unsigned ResultReg = createResultReg(RC); BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg); @@ -1133,7 +1136,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { // Then issue an extract_subreg. unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, - CopyReg, X86::SUBREG_8BIT); + CopyReg, /*Kill=*/true, + X86::sub_8bit); if (!ResultReg) return false; @@ -1436,7 +1440,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { } case CCValAssign::BCvt: { unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(), - ISD::BIT_CONVERT, Arg); + ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false); assert(BC != 0 && "Failed to emit a bitcast!"); Arg = BC; ArgVT = VA.getLocVT(); @@ -1447,7 +1451,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (VA.isRegLoc()) { TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT); bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(), - Arg, RC, RC); + Arg, RC, RC, DL); assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; Emitted = true; RegArgs.push_back(VA.getLocReg()); @@ -1473,7 +1477,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (Subtarget->isPICStyleGOT()) { TargetRegisterClass *RC = X86::GR32RegisterClass; unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF); - bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC); + bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC, + DL); assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; Emitted = true; } @@ -1552,7 +1557,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { unsigned ResultReg = createResultReg(DstRC); bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - RVLocs[0].getLocReg(), DstRC, SrcRC); + RVLocs[0].getLocReg(), DstRC, SrcRC, DL); assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; Emitted = true; if (CopyVT != RVLocs[0].getValVT()) { diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp index 541083f..747683d 100644 --- a/lib/Target/X86/X86FloatingPointRegKill.cpp +++ b/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "x86-codegen" #include "X86.h" #include "X86InstrInfo.h" -#include "X86Subtarget.h" #include "llvm/Instructions.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -42,12 +41,70 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "X86 FP_REG_KILL inserter"; } + virtual const char *getPassName() const { + return "X86 FP_REG_KILL inserter"; + } }; char FPRegKiller::ID = 0; } -FunctionPass *llvm::createX87FPRegKillInserterPass() { return new FPRegKiller(); } +FunctionPass *llvm::createX87FPRegKillInserterPass() { + return new FPRegKiller(); +} + +/// isFPStackVReg - Return true if the specified vreg is from a fp stack +/// register class. +static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(RegNo)) + return false; + + switch (MRI.getRegClass(RegNo)->getID()) { + default: return false; + case X86::RFP32RegClassID: + case X86::RFP64RegClassID: + case X86::RFP80RegClassID: + return true; + } +} + + +/// ContainsFPStackCode - Return true if the specific MBB has floating point +/// stack code, and thus needs an FP_REG_KILL. +static bool ContainsFPStackCode(MachineBasicBlock *MBB, + const MachineRegisterInfo &MRI) { + // Scan the block, looking for instructions that define fp stack vregs. + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (I->getNumOperands() == 0 || !I->getOperand(0).isReg()) + continue; + + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef()) + continue; + + if (isFPStackVReg(I->getOperand(op).getReg(), MRI)) + return true; + } + } + + // Check PHI nodes in successor blocks. These PHI's will be lowered to have + // a copy of the input value in this block, which is a definition of the + // value. + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); SI != E; ++ SI) { + MachineBasicBlock *SuccBB = *SI; + for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end(); + I != E; ++I) { + // All PHI nodes are at the top of the block. + if (!I->isPHI()) break; + + if (isFPStackVReg(I->getOperand(0).getReg(), MRI)) + return true; + } + } + + return false; +} bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { // If we are emitting FP stack code, scan the basic block to determine if this @@ -65,14 +122,13 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { // Fast-path: If nothing is using the x87 registers, we don't need to do // any scanning. - MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() && MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() && MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty()) return false; bool Changed = false; - const X86Subtarget &Subtarget = MF.getTarget().getSubtarget<X86Subtarget>(); MachineFunction::iterator MBBI = MF.begin(); MachineFunction::iterator EndMBB = MF.end(); for (; MBBI != EndMBB; ++MBBI) { @@ -87,48 +143,8 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { continue; } - bool ContainsFPCode = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - !ContainsFPCode && I != E; ++I) { - if (I->getNumOperands() != 0 && I->getOperand(0).isReg()) { - const TargetRegisterClass *clas; - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { - if (I->getOperand(op).isReg() && I->getOperand(op).isDef() && - TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) && - ((clas = MRI.getRegClass(I->getOperand(op).getReg())) == - X86::RFP32RegisterClass || - clas == X86::RFP64RegisterClass || - clas == X86::RFP80RegisterClass)) { - ContainsFPCode = true; - break; - } - } - } - } - // Check PHI nodes in successor blocks. These PHI's will be lowered to have - // a copy of the input value in this block. In SSE mode, we only care about - // 80-bit values. - if (!ContainsFPCode) { - // Final check, check LLVM BB's that are successors to the LLVM BB - // corresponding to BB for FP PHI nodes. - const BasicBlock *LLVMBB = MBB->getBasicBlock(); - const PHINode *PN; - for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB); - !ContainsFPCode && SI != E; ++SI) { - for (BasicBlock::const_iterator II = SI->begin(); - (PN = dyn_cast<PHINode>(II)); ++II) { - if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) || - (!Subtarget.hasSSE1() && PN->getType()->isFloatingPointTy()) || - (!Subtarget.hasSSE2() && - PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) { - ContainsFPCode = true; - break; - } - } - } - } - // Finally, if we found any FP code, emit the FP_REG_KILL instruction. - if (ContainsFPCode) { + // If we find any FP stack code, emit the FP_REG_KILL instruction. + if (ContainsFPStackCode(MBB, MRI)) { BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(), MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL)); ++NumFPKill; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index fd8bb1e..0f64383 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1693,7 +1693,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { Result, CurDAG->getTargetConstant(8, MVT::i8)), 0); // Then truncate it down to i8. - Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, + Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result); } else { Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -1834,7 +1834,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { CurDAG->getTargetConstant(8, MVT::i8)), 0); // Then truncate it down to i8. - Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, + Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result); } else { Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -1883,7 +1883,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Extract the l-register. - SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Reg); // Emit a testb. @@ -1912,7 +1912,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { Reg.getValueType(), Reg, RC), 0); // Extract the h-register. - SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl, + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, Reg); // Emit a testb. No special NOREX tricks are needed since there's @@ -1930,7 +1930,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Reg = N0.getNode()->getOperand(0); // Extract the 16-bit subregister. - SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl, + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, MVT::i16, Reg); // Emit a testw. @@ -1946,7 +1946,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Reg = N0.getNode()->getOperand(0); // Extract the 32-bit subregister. - SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl, + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl, MVT::i32, Reg); // Emit a testl. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6ce9ab7..b02c33d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -94,7 +94,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // X86 is weird, it always uses i8 for shift amounts and setcc results. setShiftAmountType(MVT::i8); setBooleanContents(ZeroOrOneBooleanContent); - setSchedulingPreference(SchedulingForRegPressure); + setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); if (Subtarget->isTargetDarwin()) { @@ -145,13 +145,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); } else if (!UseSoftFloat) { - if (X86ScalarSSEf64) { - // We have an impenetrably clever algorithm for ui64->double only. - setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); - } + // We have an algorithm for SSE2->double, and we turn this into a + // 64-bit FILD followed by conditional FADD for other targets. + setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); // We have an algorithm for SSE2, and we turn this into a 64-bit // FILD for other targets. - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); + setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); } // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have @@ -215,9 +214,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } // TODO: when we have SSE, these could be more efficient, by using movd/movq. - if (!X86ScalarSSEf64) { + if (!X86ScalarSSEf64) { setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand); + // Without SSE, i64->f64 goes through memory; i64->MMX is Legal. + if (Subtarget->hasMMX() && !DisableMMX) + setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Custom); + else + setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand); + } } // Scalar integer divide and remainder are lowered to use operations that @@ -679,6 +686,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSETCC, MVT::v8i8, Custom); setOperationAction(ISD::VSETCC, MVT::v4i16, Custom); setOperationAction(ISD::VSETCC, MVT::v2i32, Custom); + + if (!X86ScalarSSEf64 && Subtarget->is64Bit()) { + setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom); + setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom); + setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom); + setOperationAction(ISD::BIT_CONVERT, MVT::v2f32, Custom); + setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom); + } } if (!UseSoftFloat && Subtarget->hasSSE1()) { @@ -1244,10 +1259,8 @@ X86TargetLowering::LowerReturn(SDValue Chain, MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); unsigned Reg = FuncInfo->getSRetReturnReg(); - if (!Reg) { - Reg = MRI.createVirtualRegister(getRegClassFor(MVT::i64)); - FuncInfo->setSRetReturnReg(Reg); - } + assert(Reg && + "SRetReturnReg should have been set in LowerFormalArguments()."); SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag); @@ -1384,6 +1397,8 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, return !Subtarget->is64Bit(); case CallingConv::X86_FastCall: return !Subtarget->is64Bit(); + case CallingConv::X86_ThisCall: + return !Subtarget->is64Bit(); case CallingConv::Fast: return GuaranteedTailCallOpt; case CallingConv::GHC: @@ -1405,6 +1420,8 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { if (CC == CallingConv::X86_FastCall) return CC_X86_32_FastCall; + else if (CC == CallingConv::X86_ThisCall) + return CC_X86_32_ThisCall; else if (CC == CallingConv::Fast) return CC_X86_32_FastCC; else if (CC == CallingConv::GHC) @@ -1596,7 +1613,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - if (Is64Bit || CallConv != CallingConv::X86_FastCall) { + if (Is64Bit || (CallConv != CallingConv::X86_FastCall && + CallConv != CallingConv::X86_ThisCall)) { FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true, false)); } @@ -1716,7 +1734,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (!Is64Bit) { // RegSaveFrameIndex is X86-64 only. FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); - if (CallConv == CallingConv::X86_FastCall) + if (CallConv == CallingConv::X86_FastCall || + CallConv == CallingConv::X86_ThisCall) // fastcc functions can't have varargs. FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); } @@ -5272,7 +5291,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, } // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. - MFI->setHasCalls(true); + MFI->setAdjustsStack(true); SDValue Flag = Chain.getValue(1); return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); @@ -5462,7 +5481,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, } SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, - SDValue StackSlot, + SDValue StackSlot, SelectionDAG &DAG) const { // Build the FILD DebugLoc dl = Op.getDebugLoc(); @@ -5636,35 +5655,72 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDValue N0 = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); - // Now not UINT_TO_FP is legal (it's marked custom), dag combiner won't + // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform // the optimization here. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0); EVT SrcVT = N0.getValueType(); - if (SrcVT == MVT::i64) { - // We only handle SSE2 f64 target here; caller can expand the rest. - if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64) - return SDValue(); - + EVT DstVT = Op.getValueType(); + if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64) return LowerUINT_TO_FP_i64(Op, DAG); - } else if (SrcVT == MVT::i32 && X86ScalarSSEf64) { + else if (SrcVT == MVT::i32 && X86ScalarSSEf64) return LowerUINT_TO_FP_i32(Op, DAG); - } - - assert(SrcVT == MVT::i32 && "Unknown UINT_TO_FP to lower!"); // Make a 64-bit buffer, and use it to build an FILD. SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64); - SDValue WordOff = DAG.getConstant(4, getPointerTy()); - SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, - getPointerTy(), StackSlot, WordOff); - SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), + if (SrcVT == MVT::i32) { + SDValue WordOff = DAG.getConstant(4, getPointerTy()); + SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, + getPointerTy(), StackSlot, WordOff); + SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), + StackSlot, NULL, 0, false, false, 0); + SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), + OffsetSlot, NULL, 0, false, false, 0); + SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); + return Fild; + } + + assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, NULL, 0, false, false, 0); - SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), - OffsetSlot, NULL, 0, false, false, 0); - return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); + // For i64 source, we need to add the appropriate power of 2 if the input + // was negative. This is the same as the optimization in + // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here, + // we must be careful to do the computation in x87 extended precision, not + // in SSE. (The generic code can't know it's OK to do this, or how to.) + SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); + SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) }; + SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3); + + APInt FF(32, 0x5F800000ULL); + + // Check whether the sign bit is set. + SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), + Op.getOperand(0), DAG.getConstant(0, MVT::i64), + ISD::SETLT); + + // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. + SDValue FudgePtr = DAG.getConstantPool( + ConstantInt::get(*DAG.getContext(), FF.zext(64)), + getPointerTy()); + + // Get a pointer to FF if the sign bit was set, or to 0 otherwise. + SDValue Zero = DAG.getIntPtrConstant(0); + SDValue Four = DAG.getIntPtrConstant(4); + SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, + Zero, Four); + FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset); + + // Load the value out, extending it from f32 to f80. + // FIXME: Avoid the extend by constructing the right constant pool? + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), + FudgePtr, PseudoSourceValue::getConstantPool(), + 0, MVT::f32, false, false, 4); + // Extend everything to 80 bits to force it to be done on x87. + SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge); + return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0)); } std::pair<SDValue,SDValue> X86TargetLowering:: @@ -6593,221 +6649,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getMergeValues(Ops1, 2, dl); } -SDValue -X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, - const Value *DstSV, - uint64_t DstSVOff) const { - ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); - - // If not DWORD aligned or size is more than the threshold, call the library. - // The libc version is likely to be faster for these cases. It can use the - // address value and run time information about the CPU. - if ((Align & 3) != 0 || - !ConstantSize || - ConstantSize->getZExtValue() > - getSubtarget()->getMaxInlineSizeThreshold()) { - SDValue InFlag(0, 0); - - // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); - - if (const char *bzeroEntry = V && - V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { - EVT IntPtr = getPointerTy(); - const Type *IntPtrTy = TD->getIntPtrType(*DAG.getContext()); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - Entry.Node = Size; - Args.push_back(Entry); - std::pair<SDValue,SDValue> CallResult = - LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), - false, false, false, false, - 0, CallingConv::C, false, /*isReturnValueUsed=*/false, - DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); - return CallResult.second; - } - - // Otherwise have the target-independent code call memset. - return SDValue(); - } - - uint64_t SizeVal = ConstantSize->getZExtValue(); - SDValue InFlag(0, 0); - EVT AVT; - SDValue Count; - ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); - unsigned BytesLeft = 0; - bool TwoRepStos = false; - if (ValC) { - unsigned ValReg; - uint64_t Val = ValC->getZExtValue() & 255; - - // If the value is a constant, then we can potentially use larger sets. - switch (Align & 3) { - case 2: // WORD aligned - AVT = MVT::i16; - ValReg = X86::AX; - Val = (Val << 8) | Val; - break; - case 0: // DWORD aligned - AVT = MVT::i32; - ValReg = X86::EAX; - Val = (Val << 8) | Val; - Val = (Val << 16) | Val; - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned - AVT = MVT::i64; - ValReg = X86::RAX; - Val = (Val << 32) | Val; - } - break; - default: // Byte aligned - AVT = MVT::i8; - ValReg = X86::AL; - Count = DAG.getIntPtrConstant(SizeVal); - break; - } - - if (AVT.bitsGT(MVT::i8)) { - unsigned UBytes = AVT.getSizeInBits() / 8; - Count = DAG.getIntPtrConstant(SizeVal / UBytes); - BytesLeft = SizeVal % UBytes; - } - - Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), - InFlag); - InFlag = Chain.getValue(1); - } else { - AVT = MVT::i8; - Count = DAG.getIntPtrConstant(SizeVal); - Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); - InFlag = Chain.getValue(1); - } - - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : - X86::ECX, - Count, InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : - X86::EDI, - Dst, InFlag); - InFlag = Chain.getValue(1); - - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); - - if (TwoRepStos) { - InFlag = Chain.getValue(1); - Count = Size; - EVT CVT = Count.getValueType(); - SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, - DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); - Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : - X86::ECX, - Left, InFlag); - InFlag = Chain.getValue(1); - Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); - } else if (BytesLeft) { - // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - BytesLeft; - EVT AddrVT = Dst.getValueType(); - EVT SizeVT = Size.getValueType(); - - Chain = DAG.getMemset(Chain, dl, - DAG.getNode(ISD::ADD, dl, AddrVT, Dst, - DAG.getConstant(Offset, AddrVT)), - Src, - DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, DstSV, DstSVOff + Offset); - } - - // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. - return Chain; -} - -SDValue -X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const { - // This requires the copy size to be a constant, preferrably - // within a subtarget-specific limit. - ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); - if (!ConstantSize) - return SDValue(); - uint64_t SizeVal = ConstantSize->getZExtValue(); - if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) - return SDValue(); - - /// If not DWORD aligned, call the library. - if ((Align & 3) != 0) - return SDValue(); - - // DWORD aligned - EVT AVT = MVT::i32; - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned - AVT = MVT::i64; - - unsigned UBytes = AVT.getSizeInBits() / 8; - unsigned CountVal = SizeVal / UBytes; - SDValue Count = DAG.getIntPtrConstant(CountVal); - unsigned BytesLeft = SizeVal % UBytes; - - SDValue InFlag(0, 0); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : - X86::ECX, - Count, InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : - X86::EDI, - Dst, InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : - X86::ESI, - Src, InFlag); - InFlag = Chain.getValue(1); - - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; - SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, - array_lengthof(Ops)); - - SmallVector<SDValue, 4> Results; - Results.push_back(RepMovs); - if (BytesLeft) { - // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - BytesLeft; - EVT DstVT = Dst.getValueType(); - EVT SrcVT = Src.getValueType(); - EVT SizeVT = Size.getValueType(); - Results.push_back(DAG.getMemcpy(Chain, dl, - DAG.getNode(ISD::ADD, dl, DstVT, Dst, - DAG.getConstant(Offset, DstVT)), - DAG.getNode(ISD::ADD, dl, SrcVT, Src, - DAG.getConstant(Offset, SrcVT)), - DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, AlwaysInline, - DstSV, DstSVOff + Offset, - SrcSV, SrcSVOff + Offset)); - } - - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Results[0], Results.size()); -} - SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); @@ -7138,6 +6979,9 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); @@ -7161,6 +7005,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -7298,6 +7143,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, break; } case CallingConv::X86_FastCall: + case CallingConv::X86_ThisCall: case CallingConv::Fast: // Pass 'nest' parameter in EAX. // Must be kept in sync with X86CallingConv.td @@ -7630,6 +7476,27 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, return DAG.getMergeValues(Ops, 2, dl); } +SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op, + SelectionDAG &DAG) const { + EVT SrcVT = Op.getOperand(0).getValueType(); + EVT DstVT = Op.getValueType(); + assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() && + Subtarget->hasMMX() && !DisableMMX) && + "Unexpected custom BIT_CONVERT"); + assert((DstVT == MVT::i64 || + (DstVT.isVector() && DstVT.getSizeInBits()==64)) && + "Unexpected custom BIT_CONVERT"); + // i64 <=> MMX conversions are Legal. + if (SrcVT==MVT::i64 && DstVT.isVector()) + return Op; + if (DstVT==MVT::i64 && SrcVT.isVector()) + return Op; + // MMX <=> MMX conversions are Legal. + if (SrcVT.isVector() && DstVT.isVector()) + return Op; + // All other conversions need to be expanded. + return SDValue(); +} SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); @@ -7699,6 +7566,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SMULO: case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG); + case ISD::BIT_CONVERT: return LowerBIT_CONVERT(Op, DAG); } } @@ -8203,9 +8071,15 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MachineOperand& dest1Oper = bInstr->getOperand(0); MachineOperand& dest2Oper = bInstr->getOperand(1); MachineOperand* argOpers[2 + X86AddrNumOperands]; - for (int i=0; i < 2 + X86AddrNumOperands; ++i) + for (int i=0; i < 2 + X86AddrNumOperands; ++i) { argOpers[i] = &bInstr->getOperand(i+2); + // We use some of the operands multiple times, so conservatively just + // clear any kill flags that might be present. + if (argOpers[i]->isReg() && argOpers[i]->isUse()) + argOpers[i]->setIsKill(false); + } + // x86 address has 5 operands: base, index, scale, displacement, and segment. int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 440601f9..1ef1a7b 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -563,7 +563,7 @@ namespace llvm { return !X86ScalarSSEf64 || VT == MVT::f80; } - virtual const X86Subtarget* getSubtarget() const { + const X86Subtarget* getSubtarget() const { return Subtarget; } @@ -677,6 +677,7 @@ namespace llvm { SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG) const; + SDValue LowerBIT_CONVERT(SDValue op, SelectionDAG &DAG) const; SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; @@ -743,23 +744,6 @@ namespace llvm { void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, unsigned NewOp) const; - SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, - const Value *DstSV, - uint64_t DstSVOff) const; - SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const; - /// Utility function to emit string processing sse4.2 instructions /// that return in xmm0. /// This takes the instruction to expand, the associated machine basic diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index f5c3dbf..97eb17c 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -18,7 +18,9 @@ // // 64-bits but only 32 bits are significant. -def i64i32imm : Operand<i64>; +def i64i32imm : Operand<i64> { + let ParserMatchClass = ImmSExti64i32AsmOperand; +} // 64-bits but only 32 bits are significant, and those bits are treated as being // pc relative. @@ -30,7 +32,7 @@ def i64i32imm_pcrel : Operand<i64> { // 64-bits but only 8 bits are significant. def i64i8imm : Operand<i64> { - let ParserMatchClass = ImmSExt8AsmOperand; + let ParserMatchClass = ImmSExti64i8AsmOperand; } // Special i64mem for addresses of load folding tail calls. These are not @@ -198,6 +200,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; + let mayLoad = 1 in def TCRETURNmi64 : I<0, Pseudo, (outs), (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; @@ -208,6 +211,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops), "jmp{q}\t{*}$dst # TAILCALL", []>; + let mayLoad = 1 in def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops), "jmp{q}\t{*}$dst # TAILCALL", []>; } @@ -241,6 +245,7 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; +let mayLoad = 1 in def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; @@ -267,14 +272,16 @@ def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), "push{q}\t$imm", []>; def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), "push{q}\t$imm", []>; -def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), +def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm), "push{q}\t$imm", []>; } -let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in -def POPFQ : I<0x9D, RawFrm, (outs), (ins), "popf{q}", []>, REX_W; -let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in -def PUSHFQ64 : I<0x9C, RawFrm, (outs), (ins), "pushf{q}", []>; +let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in +def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>, + Requires<[In64BitMode]>; +let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in +def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>, + Requires<[In64BitMode]>; def LEA64_32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_32mem:$src), @@ -309,16 +316,22 @@ def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), } // Defs = [EFLAGS] // Repeat string ops -let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI] in +let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", [(X86rep_movs i64)]>, REP; -let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI] in +let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", [(X86rep_stos i64)]>, REP; -def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scas{q}", []>; +let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in +def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>; + +let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in +def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>; -def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmps{q}", []>; +def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>; + +def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>; // Fast system-call instructions def SYSEXIT64 : RI<0x35, RawFrm, @@ -341,8 +354,17 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), [(set GR64:$dst, i64immSExt32:$src)]>; } +// The assembler accepts movq of a 64-bit immediate as an alternate spelling of +// movabsq. +let isAsmParserOnly = 1 in { +def MOV64ri_alt : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; +} + +let isCodeGenOnly = 1 in { def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>; +} let canFoldAsLoad = 1, isReMaterializable = 1 in def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), @@ -398,9 +420,9 @@ def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; // Moves to and from control registers -def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG_64:$src), +def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; -def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_64:$dst), (ins GR64:$src), +def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; // Sign/Zero extenders @@ -478,7 +500,7 @@ def def32 : PatLeaf<(i32 GR32:$src), [{ // In the case of a 32-bit def that is known to implicitly zero-extend, // we can use a SUBREG_TO_REG. def : Pat<(i64 (zext def32:$src)), - (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>; + (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; let neverHasSideEffects = 1 in { let Defs = [RAX], Uses = [EAX] in @@ -496,7 +518,7 @@ let neverHasSideEffects = 1 in { let Defs = [EFLAGS] in { -def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i32imm:$src), +def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src), "add{q}\t{$src, %rax|%rax, $src}", []>; let isTwoAddress = 1 in { @@ -555,7 +577,7 @@ def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2), let Uses = [EFLAGS] in { -def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i32imm:$src), +def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src), "adc{q}\t{$src, %rax|%rax, $src}", []>; let isTwoAddress = 1 in { @@ -565,9 +587,11 @@ def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), "adc{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def ADC64rr_REV : RI<0x13, MRMSrcReg , (outs GR32:$dst), (ins GR64:$src1, GR64:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", []>; +} def ADC64rm : RI<0x13, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), @@ -605,9 +629,11 @@ def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst), [(set GR64:$dst, EFLAGS, (X86sub_flag GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", []>; +} // Register-Memory Subtraction def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst), @@ -629,7 +655,7 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>; } // isTwoAddress -def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i32imm:$src), +def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src), "sub{q}\t{$src, %rax|%rax, $src}", []>; // Memory-Register Subtraction @@ -657,9 +683,11 @@ def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst), "sbb{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def SBB64rr_REV : RI<0x1B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", []>; +} def SBB64rm : RI<0x1B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), @@ -676,7 +704,7 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>; } // isTwoAddress -def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i32imm:$src), +def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src), "sbb{q}\t{$src, %rax|%rax, $src}", []>; def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), @@ -1076,7 +1104,7 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", [(store (not (loadi64 addr:$dst)), addr:$dst)]>; let Defs = [EFLAGS] in { -def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i32imm:$src), +def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src), "and{q}\t{$src, %rax|%rax, $src}", []>; let isTwoAddress = 1 in { @@ -1086,9 +1114,11 @@ def AND64rr : RI<0x21, MRMDestReg, "and{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86and_flag GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "and{q}\t{$src2, $dst|$dst, $src2}", []>; +} def AND64rm : RI<0x23, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "and{q}\t{$src2, $dst|$dst, $src2}", @@ -1129,9 +1159,11 @@ def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst), "or{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86or_flag GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", []>; +} def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", @@ -1162,7 +1194,7 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src), [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), (implicit EFLAGS)]>; -def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i32imm:$src), +def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src), "or{q}\t{$src, %rax|%rax, $src}", []>; let isTwoAddress = 1 in { @@ -1172,9 +1204,11 @@ def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), "xor{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86xor_flag GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "xor{q}\t{$src2, $dst|$dst, $src2}", []>; +} def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "xor{q}\t{$src2, $dst|$dst, $src2}", @@ -1205,7 +1239,7 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src), [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), (implicit EFLAGS)]>; -def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src), +def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i64i32imm:$src), "xor{q}\t{$src, %rax|%rax, $src}", []>; } // Defs = [EFLAGS] @@ -1216,7 +1250,7 @@ def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src), // Integer comparison let Defs = [EFLAGS] in { -def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i32imm:$src), +def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i64i32imm:$src), "test{q}\t{$src, %rax|%rax, $src}", []>; let isCommutable = 1 in def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), @@ -1238,7 +1272,7 @@ def TEST64mi32 : RIi32<0xF7, MRM0m, (outs), i64immSExt32:$src2), 0))]>; -def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i32imm:$src), +def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i64i32imm:$src), "cmp{q}\t{$src, %rax|%rax, $src}", []>; def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", @@ -1293,7 +1327,8 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB; + [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB, + REX_W; // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. @@ -1714,11 +1749,13 @@ def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src), def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; @@ -1730,7 +1767,7 @@ def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), "xchg{q}\t{$src, %rax|%rax, $src}", []>; // Optimized codegen when the non-memory output is not used. -let Defs = [EFLAGS] in { +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in { // FIXME: Use normal add / sub instructions and add lock prefix dynamically. def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "lock\n\t" @@ -1982,14 +2019,14 @@ def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>; // defined after an extload. def : Pat<(extloadi64i32 addr:$src), (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), - x86_subreg_32bit)>; + sub_32bit)>; // anyext. Define these to do an explicit zero-extend to // avoid partial-register updates. def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>; def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>; def : Pat<(i64 (anyext GR32:$src)), - (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>; + (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; //===----------------------------------------------------------------------===// // Some peepholes @@ -2016,54 +2053,54 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm), (SUBREG_TO_REG (i64 0), (AND32ri - (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit), + (EXTRACT_SUBREG GR64:$src, sub_32bit), (i32 (GetLo32XForm imm:$imm))), - x86_subreg_32bit)>; + sub_32bit)>; // r & (2^32-1) ==> movz def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), - (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>; + (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; // r & (2^16-1) ==> movz def : Pat<(and GR64:$src, 0xffff), - (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>; + (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>; // r & (2^8-1) ==> movz def : Pat<(and GR64:$src, 0xff), - (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>; + (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>; // r & (2^8-1) ==> movz def : Pat<(and GR32:$src1, 0xff), - (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit))>, + (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>, Requires<[In64BitMode]>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), - (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, x86_subreg_8bit)))>, + (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>, Requires<[In64BitMode]>; // sext_inreg patterns def : Pat<(sext_inreg GR64:$src, i32), - (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>; + (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; def : Pat<(sext_inreg GR64:$src, i16), - (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>; + (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>; def : Pat<(sext_inreg GR64:$src, i8), - (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>; + (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>; def : Pat<(sext_inreg GR32:$src, i8), - (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>, + (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>, Requires<[In64BitMode]>; def : Pat<(sext_inreg GR16:$src, i8), - (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)))>, + (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>, Requires<[In64BitMode]>; // trunc patterns def : Pat<(i32 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)>; + (EXTRACT_SUBREG GR64:$src, sub_32bit)>; def : Pat<(i16 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)>; + (EXTRACT_SUBREG GR64:$src, sub_16bit)>; def : Pat<(i8 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)>; + (EXTRACT_SUBREG GR64:$src, sub_8bit)>; def : Pat<(i8 (trunc GR32:$src)), - (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)>, + (EXTRACT_SUBREG GR32:$src, sub_8bit)>, Requires<[In64BitMode]>; def : Pat<(i8 (trunc GR16:$src)), - (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)>, + (EXTRACT_SUBREG GR16:$src, sub_8bit)>, Requires<[In64BitMode]>; // h-register tricks. @@ -2079,67 +2116,67 @@ def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)), (i64 0), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), - x86_subreg_8bit_hi)), - x86_subreg_32bit)>; + sub_8bit_hi)), + sub_32bit)>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi)), - x86_subreg_16bit)>, + sub_8bit_hi)), + sub_16bit)>, Requires<[In64BitMode]>; def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi)), - x86_subreg_32bit)>; + sub_8bit_hi)), + sub_32bit)>; def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi)), - x86_subreg_32bit)>; + sub_8bit_hi)), + sub_32bit)>; // h-register extract and store. def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), - x86_subreg_8bit_hi))>; + sub_8bit_hi))>; def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; // (shl x, 1) ==> (add x, x) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index a21bfb9..34e12ca 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -744,17 +744,17 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, case X86::MOVZX32rr8: case X86::MOVSX64rr8: case X86::MOVZX64rr8: - SubIdx = 1; + SubIdx = X86::sub_8bit; break; case X86::MOVSX32rr16: case X86::MOVZX32rr16: case X86::MOVSX64rr16: case X86::MOVZX64rr16: - SubIdx = 3; + SubIdx = X86::sub_16bit; break; case X86::MOVSX64rr32: case X86::MOVZX64rr32: - SubIdx = 4; + SubIdx = X86::sub_32bit; break; } return true; @@ -1065,7 +1065,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, const TargetRegisterInfo *TRI) const { - DebugLoc DL = MBB.findDebugLoc(I); + DebugLoc DL = Orig->getDebugLoc(); if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { DestReg = TRI->getSubReg(DestReg, SubIdx); @@ -1154,7 +1154,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) .addReg(leaInReg) .addReg(Src, getKillRegState(isKill)) - .addImm(X86::SUBREG_16BIT); + .addImm(X86::sub_16bit); MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg); @@ -1198,7 +1198,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2) .addReg(leaInReg2) .addReg(Src2, getKillRegState(isKill2)) - .addImm(X86::SUBREG_16BIT); + .addImm(X86::sub_16bit); addRegReg(MIB, leaInReg, true, leaInReg2, true); } if (LV && isKill2 && InsMI2) @@ -1212,7 +1212,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(leaOutReg, RegState::Kill) - .addImm(X86::SUBREG_16BIT); + .addImm(X86::sub_16bit); if (LV) { // Update live variables @@ -1901,8 +1901,8 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL = MBB.findDebugLoc(MI); + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { // Determine if DstRC and SrcRC have a common superclass in common. const TargetRegisterClass *CommonRC = DestRC; @@ -1993,12 +1993,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, if (SrcReg != X86::EFLAGS) return false; if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSHFQ64)); + BuildMI(MBB, MI, DL, get(X86::PUSHF64)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); return true; } else if (DestRC == &X86::GR32RegClass || DestRC == &X86::GR32_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSHFD)); + BuildMI(MBB, MI, DL, get(X86::PUSHF32)); BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); return true; } @@ -2007,12 +2007,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, return false; if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); - BuildMI(MBB, MI, DL, get(X86::POPFQ)); + BuildMI(MBB, MI, DL, get(X86::POPF64)); return true; } else if (SrcRC == &X86::GR32RegClass || DestRC == &X86::GR32_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); - BuildMI(MBB, MI, DL, get(X86::POPFD)); + BuildMI(MBB, MI, DL, get(X86::POPF32)); return true; } } @@ -2133,7 +2133,8 @@ static unsigned getStoreRegOpcode(unsigned SrcReg, void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); @@ -2230,7 +2231,8 @@ static unsigned getLoadRegOpcode(unsigned DestReg, void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const{ + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); @@ -2256,7 +2258,8 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -2284,7 +2287,8 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, CalleeFrameSize += SlotSize; BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); } else { - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass, + &RI); } } @@ -2294,7 +2298,8 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -2314,7 +2319,7 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (RegClass != &X86::VR128RegClass && !isWin64) { BuildMI(MBB, MI, DL, get(Opc), Reg); } else { - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI); } } return true; @@ -2478,9 +2483,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, unsigned DstReg = NewMI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(DstReg)) NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, - 4/*x86_subreg_32bit*/)); + X86::sub_32bit)); else - NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/); + NewMI->getOperand(0).setSubReg(X86::sub_32bit); } return NewMI; } @@ -2526,9 +2531,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, switch (MI->getOpcode()) { default: return NULL; case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break; } // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. @@ -2595,9 +2600,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, switch (MI->getOpcode()) { default: return NULL; case X86::TEST8rr: NewOpc = X86::CMP8ri; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri8; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri8; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri8; break; } // Change to CMPXXri r, 0 first. MI->setDesc(get(NewOpc)); @@ -2805,16 +2810,22 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, switch (DataMI->getOpcode()) { default: break; case X86::CMP64ri32: + case X86::CMP64ri8: case X86::CMP32ri: + case X86::CMP32ri8: case X86::CMP16ri: + case X86::CMP16ri8: case X86::CMP8ri: { MachineOperand &MO0 = DataMI->getOperand(0); MachineOperand &MO1 = DataMI->getOperand(1); if (MO1.getImm() == 0) { switch (DataMI->getOpcode()) { default: break; + case X86::CMP64ri8: case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; + case X86::CMP32ri8: case X86::CMP32ri: NewOpc = X86::TEST32rr; break; + case X86::CMP16ri8: case X86::CMP16ri: NewOpc = X86::TEST16rr; break; case X86::CMP8ri: NewOpc = X86::TEST8rr; break; } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index df99c7f..62d7c74 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -590,11 +590,13 @@ public: MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, SmallVectorImpl<MachineOperand> &Addr, @@ -606,7 +608,8 @@ public: virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl<MachineOperand> &Addr, @@ -617,11 +620,13 @@ public: virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index a2754ea..0d59c42 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -195,15 +195,15 @@ def ptr_rc_nosp : PointerLikeRegClass<1>; // def X86MemAsmOperand : AsmOperandClass { let Name = "Mem"; - let SuperClass = ?; -} -def X86AbsMemAsmOperand : AsmOperandClass { - let Name = "AbsMem"; - let SuperClass = X86MemAsmOperand; + let SuperClasses = []; } def X86NoSegMemAsmOperand : AsmOperandClass { let Name = "NoSegMem"; - let SuperClass = X86MemAsmOperand; + let SuperClasses = [X86MemAsmOperand]; +} +def X86AbsMemAsmOperand : AsmOperandClass { + let Name = "AbsMem"; + let SuperClasses = [X86NoSegMemAsmOperand]; } class X86MemOperand<string printMethod> : Operand<iPTR> { let PrintMethod = printMethod; @@ -270,19 +270,49 @@ def SSECC : Operand<i8> { let PrintMethod = "printSSECC"; } -def ImmSExt8AsmOperand : AsmOperandClass { - let Name = "ImmSExt8"; - let SuperClass = ImmAsmOperand; +class ImmSExtAsmOperandClass : AsmOperandClass { + let SuperClasses = [ImmAsmOperand]; + let RenderMethod = "addImmOperands"; +} + +// Sign-extended immediate classes. We don't need to define the full lattice +// here because there is no instruction with an ambiguity between ImmSExti64i32 +// and ImmSExti32i8. +// +// The strange ranges come from the fact that the assembler always works with +// 64-bit immediates, but for a 16-bit target value we want to accept both "-1" +// (which will be a -1ULL), and "0xFF" (-1 in 16-bits). + +// [0, 0x7FFFFFFF] | [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF] +def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass { + let Name = "ImmSExti64i32"; +} + +// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass { + let Name = "ImmSExti16i8"; + let SuperClasses = [ImmSExti64i32AsmOperand]; +} + +// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass { + let Name = "ImmSExti32i8"; +} + +// [0, 0x0000007F] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass { + let Name = "ImmSExti64i8"; + let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, ImmSExti64i32AsmOperand]; } // A couple of more descriptive operand definitions. // 16-bits but only 8 bits are significant. def i16i8imm : Operand<i16> { - let ParserMatchClass = ImmSExt8AsmOperand; + let ParserMatchClass = ImmSExti16i8AsmOperand; } // 32-bits but only 8 bits are significant. def i32i8imm : Operand<i32> { - let ParserMatchClass = ImmSExt8AsmOperand; + let ParserMatchClass = ImmSExti32i8AsmOperand; } //===----------------------------------------------------------------------===// @@ -542,8 +572,10 @@ let neverHasSideEffects = 1 in { } // Trap -def INT3 : I<0xcc, RawFrm, (outs), (ins), "int\t3", []>; -def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; +def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; +def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", []>; +// FIXME: need to make sure that "int $3" matches int3 +def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize; def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>; @@ -693,6 +725,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TCRETURNri : I<0, Pseudo, (outs), (ins GR32_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; + let mayLoad = 1 in def TCRETURNmi : I<0, Pseudo, (outs), (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; @@ -706,8 +739,16 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), "jmp{l}\t{*}$dst # TAILCALL", []>; + let mayLoad = 1 in def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops), "jmp{l}\t{*}$dst # TAILCALL", []>; + + // FIXME: This is a hack so that MCInst lowering can preserve the TAILCALL + // marker on instructions, while still being able to relax. + let isCodeGenOnly = 1 in { + def TAILJMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), + "jmp\t$dst # TAILCALL", []>; + } } //===----------------------------------------------------------------------===// @@ -719,10 +760,12 @@ def LEAVE : I<0xC9, RawFrm, def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; +let mayLoad = 1 in def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; +let mayLoad = 1 in def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; @@ -762,12 +805,14 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), } let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in { -def POPF : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize; -def POPFD : I<0x9D, RawFrm, (outs), (ins), "popf{l}", []>; +def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize; +def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>, + Requires<[In32BitMode]>; } let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in { -def PUSHF : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize; -def PUSHFD : I<0x9C, RawFrm, (outs), (ins), "pushf{l}", []>; +def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize; +def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>, + Requires<[In32BitMode]>; } let isTwoAddress = 1 in // GR32 = bswap GR32 @@ -867,7 +912,7 @@ def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, let Defs = [RAX, RCX, RDX] in def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; -let isBarrier = 1, hasCtrlDep = 1 in { +let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in { def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; } @@ -966,36 +1011,47 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(store (i32 imm:$src), addr:$dst)]>; -def MOV8o8a : Ii8 <0xA0, RawFrm, (outs), (ins offset8:$src), +/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a +/// 32-bit offset from the PC. These are only valid in x86-32 mode. +def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), "mov{b}\t{$src, %al|%al, $src}", []>; -def MOV16o16a : Ii16 <0xA1, RawFrm, (outs), (ins offset16:$src), +def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src), "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src), "mov{l}\t{$src, %eax|%eax, $src}", []>; - -def MOV8ao8 : Ii8 <0xA2, RawFrm, (outs offset8:$dst), (ins), +def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins), "mov{b}\t{%al, $dst|$dst, %al}", []>; -def MOV16ao16 : Ii16 <0xA3, RawFrm, (outs offset16:$dst), (ins), +def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize; def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, %eax}", []>; - + // Moves to and from segment registers def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; +let isCodeGenOnly = 1 in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), "mov{b}\t{$src, $dst|$dst, $src}", []>; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>; +} let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), @@ -1059,10 +1115,10 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; // Moves to and from control registers -def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG_32:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; -def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; //===----------------------------------------------------------------------===// // Fixed-Register Multiplication and Division Instructions... @@ -1746,6 +1802,7 @@ def AND32rr : I<0x21, MRMDestReg, // AND instructions with the destination register in REG and the source register // in R/M. Included for the disassembler. +let isCodeGenOnly = 1 in { def AND8rr_REV : I<0x22, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "and{b}\t{$src2, $dst|$dst, $src2}", []>; def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst), @@ -1754,6 +1811,7 @@ def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst), def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "and{l}\t{$src2, $dst|$dst, $src2}", []>; +} def AND8rm : I<0x22, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), @@ -1872,6 +1930,7 @@ def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst), // OR instructions with the destination register in REG and the source register // in R/M. Included for the disassembler. +let isCodeGenOnly = 1 in { def OR8rr_REV : I<0x0A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "or{b}\t{$src2, $dst|$dst, $src2}", []>; def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst), @@ -1880,6 +1939,7 @@ def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst), def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "or{l}\t{$src2, $dst|$dst, $src2}", []>; +} def OR8rm : I<0x0A, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), @@ -1988,6 +2048,7 @@ let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y // XOR instructions with the destination register in REG and the source register // in R/M. Included for the disassembler. +let isCodeGenOnly = 1 in { def XOR8rr_REV : I<0x32, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "xor{b}\t{$src2, $dst|$dst, $src2}", []>; def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst), @@ -1996,6 +2057,7 @@ def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst), def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "xor{l}\t{$src2, $dst|$dst, $src2}", []>; +} def XOR8rm : I<0x32, MRMSrcMem, (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2), @@ -2793,6 +2855,7 @@ def ADC32rr : I<0x11, MRMDestReg, (outs GR32:$dst), [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>; } +let isCodeGenOnly = 1 in { def ADC8rr_REV : I<0x12, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "adc{b}\t{$src2, $dst|$dst, $src2}", []>; def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst), @@ -2801,6 +2864,7 @@ def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst), def ADC32rr_REV : I<0x13, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "adc{l}\t{$src2, $dst|$dst, $src2}", []>; +} def ADC8rm : I<0x12, MRMSrcMem , (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2), @@ -2888,6 +2952,7 @@ def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), [(set GR32:$dst, EFLAGS, (X86sub_flag GR32:$src1, GR32:$src2))]>; +let isCodeGenOnly = 1 in { def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "sub{b}\t{$src2, $dst|$dst, $src2}", []>; def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst), @@ -2896,6 +2961,7 @@ def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst), def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "sub{l}\t{$src2, $dst|$dst, $src2}", []>; +} // Register-Memory Subtraction def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst), @@ -3039,6 +3105,7 @@ let isTwoAddress = 0 in { "sbb{l}\t{$src, %eax|%eax, $src}", []>; } +let isCodeGenOnly = 1 in { def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "sbb{b}\t{$src2, $dst|$dst, $src2}", []>; def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst), @@ -3047,6 +3114,7 @@ def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst), def SBB32rr_REV : I<0x1B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "sbb{l}\t{$src2, $dst|$dst, $src2}", []>; +} def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2), "sbb{b}\t{$src2, $dst|$dst, $src2}", @@ -3864,12 +3932,14 @@ def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in { def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB; def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; +} def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB; @@ -3878,12 +3948,14 @@ def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in { def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB; def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; +} let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), @@ -3891,7 +3963,7 @@ def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), // Optimized codegen when the non-memory output is not used. // FIXME: Use normal add / sub instructions and add lock prefix dynamically. -let Defs = [EFLAGS] in { +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in { def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), "lock\n\t" "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; @@ -4453,7 +4525,7 @@ def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; // Except for i16 -> i32 since isel expect i16 ops to be promoted to i32. def : Pat<(i32 (anyext GR16:$src)), - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>; + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>; //===----------------------------------------------------------------------===// @@ -4473,81 +4545,81 @@ def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst), // r & (2^16-1) ==> movz def : Pat<(and GR32:$src1, 0xffff), - (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>; + (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>; // r & (2^8-1) ==> movz def : Pat<(and GR32:$src1, 0xff), (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1, GR32_ABCD)), - x86_subreg_8bit))>, + sub_8bit))>, Requires<[In32BitMode]>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), - x86_subreg_8bit))>, + sub_8bit))>, Requires<[In32BitMode]>; // sext_inreg patterns def : Pat<(sext_inreg GR32:$src, i16), - (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>; + (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>; def : Pat<(sext_inreg GR32:$src, i8), (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit))>, + sub_8bit))>, Requires<[In32BitMode]>; def : Pat<(sext_inreg GR16:$src, i8), (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit))>, + sub_8bit))>, Requires<[In32BitMode]>; // trunc patterns def : Pat<(i16 (trunc GR32:$src)), - (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>; + (EXTRACT_SUBREG GR32:$src, sub_16bit)>; def : Pat<(i8 (trunc GR32:$src)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit)>, + sub_8bit)>, Requires<[In32BitMode]>; def : Pat<(i8 (trunc GR16:$src)), (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit)>, + sub_8bit)>, Requires<[In32BitMode]>; // h-register tricks def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi)>, + sub_8bit_hi)>, Requires<[In32BitMode]>; def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi)>, + sub_8bit_hi)>, Requires<[In32BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi)), - x86_subreg_16bit)>, + sub_8bit_hi)), + sub_16bit)>, Requires<[In32BitMode]>; def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In32BitMode]>; def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In32BitMode]>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In32BitMode]>; def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In32BitMode]>; // (shl x, 1) ==> (add x, x) diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 744af50..0952fc8 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -117,9 +117,6 @@ def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; -def MMX_MOVQ64gmr : MMXRI<0x7F, MRMDestMem, (outs), - (ins i64mem:$dst, VR64:$src), - "movq\t{$src, $dst|$dst, $src}", []>; let neverHasSideEffects = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), @@ -133,10 +130,10 @@ let neverHasSideEffects = 1 in def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; -def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), - "movd\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, - (v1i64 (scalar_to_vector GR64:$src)))]>; +def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, + (v1i64 (scalar_to_vector GR64:$src)))]>; let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2129580..5580ba7 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -117,17 +117,17 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() >= 16; }]>; -def alignedloadfsf32 : PatFrag<(ops node:$ptr), +def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>; -def alignedloadfsf64 : PatFrag<(ops node:$ptr), +def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>; -def alignedloadv4f32 : PatFrag<(ops node:$ptr), +def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>; -def alignedloadv2f64 : PatFrag<(ops node:$ptr), +def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>; -def alignedloadv4i32 : PatFrag<(ops node:$ptr), +def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>; -def alignedloadv2i64 : PatFrag<(ops node:$ptr), +def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>; // Like 'load', but uses special alignment checks suitable for use in @@ -387,11 +387,11 @@ def MOVSSrr : SSI<0x10, MRMSrcReg, let AddedComplexity = 15 in def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), (MOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>; + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; // Implicitly promote a 32-bit scalar to a vector. def : Pat<(v4f32 (scalar_to_vector FR32:$src)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>; + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; // Loading from memory automatically zeroing upper bits. let canFoldAsLoad = 1, isReMaterializable = 1 in @@ -403,11 +403,11 @@ def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), // with SUBREG_TO_REG. let AddedComplexity = 20 in { def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; } // Store scalar value to memory. @@ -419,7 +419,7 @@ def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), addr:$dst), (MOVSSmr addr:$dst, - (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; + (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; // Conversion instructions def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), @@ -449,7 +449,7 @@ def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), [(set GR32:$dst, (int_x86_sse_cvtss2si (load addr:$src)))]>; -// Match intrinisics which expect MM and XMM operand(s). +// Match intrinsics which expect MM and XMM operand(s). def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "cvtps2pi\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>; @@ -509,6 +509,17 @@ let mayLoad = 1 in def CMPSSrm : SSIi8<0xC2, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; + + // Accept explicit immediate argument form instead of comparison code. +let isAsmParserOnly = 1 in { + def CMPSSrr_alt : SSIi8<0xC2, MRMSrcReg, + (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2), + "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +let mayLoad = 1 in + def CMPSSrm_alt : SSIi8<0xC2, MRMSrcMem, + (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2), + "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +} } let Defs = [EFLAGS] in { @@ -518,25 +529,25 @@ def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2), def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2), "ucomiss\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>; - + def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "comiss\t{$src2, $src1|$src1, $src2}", []>; def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "comiss\t{$src2, $src1|$src1, $src2}", []>; - + } // Defs = [EFLAGS] // Aliases to match intrinsics which expect XMM operand(s). let Constraints = "$src1 = $dst" in { def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ss + [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, VR128:$src, imm:$cc))]>; def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), + (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, @@ -1009,6 +1020,16 @@ let Constraints = "$src1 = $dst" in { "cmp${cc}ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1, (memop addr:$src), imm:$cc))]>; + + // Accept explicit immediate argument form instead of comparison code. +let isAsmParserOnly = 1 in { + def CMPPSrri_alt : PSIi8<0xC2, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2), + "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>; + def CMPPSrmi_alt : PSIi8<0xC2, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2), + "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>; +} } def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; @@ -1102,7 +1123,8 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), } // Load, store, and memory fence -def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>; +def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, + TB, Requires<[HasSSE1]>; // MXCSR register def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), @@ -1130,7 +1152,7 @@ def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; //===---------------------------------------------------------------------===// // SSE2 Instructions @@ -1152,11 +1174,11 @@ def MOVSDrr : SDI<0x10, MRMSrcReg, let AddedComplexity = 15 in def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>; + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; // Implicitly promote a 64-bit scalar to a vector. def : Pat<(v2f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>; + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; // Loading from memory automatically zeroing upper bits. let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in @@ -1168,15 +1190,15 @@ def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), // with SUBREG_TO_REG. let AddedComplexity = 20 in { def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; } // Store scalar value to memory. @@ -1188,7 +1210,7 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst), (MOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>; + (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; // Conversion instructions def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src), @@ -1255,7 +1277,7 @@ def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src), [(set GR32:$dst, (int_x86_sse2_cvtsd2si (load addr:$src)))]>; -// Match intrinisics which expect MM and XMM operand(s). +// Match intrinsics which expect MM and XMM operand(s). def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "cvtpd2pi\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>; @@ -1297,6 +1319,17 @@ let mayLoad = 1 in def CMPSDrm : SDIi8<0xC2, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; + + // Accept explicit immediate argument form instead of comparison code. +let isAsmParserOnly = 1 in { + def CMPSDrr_alt : SDIi8<0xC2, MRMSrcReg, + (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2), + "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +let mayLoad = 1 in + def CMPSDrm_alt : SDIi8<0xC2, MRMSrcMem, + (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2), + "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +} } let Defs = [EFLAGS] in { @@ -1311,13 +1344,13 @@ def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2), // Aliases to match intrinsics which expect XMM operand(s). let Constraints = "$src1 = $dst" in { def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, VR128:$src, imm:$cc))]>; def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, @@ -1655,7 +1688,7 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, + [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))]>, XS, Requires<[HasSSE2]>; def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), @@ -1890,6 +1923,16 @@ let Constraints = "$src1 = $dst" in { "cmp${cc}pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1, (memop addr:$src), imm:$cc))]>; + + // Accept explicit immediate argument form instead of comparison code. +let isAsmParserOnly = 1 in { + def CMPPDrri_alt : PDIi8<0xC2, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2), + "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; + def CMPPDrmi_alt : PDIi8<0xC2, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2), + "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +} } def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; @@ -1980,24 +2023,24 @@ let Constraints = "$src1 = $dst" in { multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, bit Commutable = 0> { - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> { let isCommutable = Commutable; } - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), + def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, - (bitconvert (memopv2i64 + (bitconvert (memopv2i64 addr:$src2))))]>; } multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, string OpcodeStr, Intrinsic IntId, Intrinsic IntId2> { - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; @@ -2006,7 +2049,7 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>; - def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), + def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>; @@ -2015,13 +2058,13 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, /// PDI_binop_rm - Simple SSE2 binary operator. multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, bit Commutable = 0> { - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> { let isCommutable = Commutable; } - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), + def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (OpVT (OpNode VR128:$src1, @@ -2416,6 +2459,10 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins), def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; +// Pause. This "instruction" is encoded as "rep; nop", so even though it +// was introduced with SSE2, it's backward compatible. +def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; + //TODO: custom lower this so as to never even generate the noop def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 0)), (NOOP)>; @@ -2462,7 +2509,7 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), (iPTR 0))), addr:$dst)]>; def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>; + (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", @@ -2903,7 +2950,7 @@ defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw", defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw", int_x86_ssse3_pmul_hr_sw, int_x86_ssse3_pmul_hr_sw_128, 1>; - + defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb", int_x86_ssse3_pshuf_b, int_x86_ssse3_pshuf_b_128>; @@ -3042,10 +3089,10 @@ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), (MOVSSrr (v4f32 (V_SET0PS)), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>; + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), (MOVSSrr (v4i32 (V_SET0PI)), - (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>; + (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; } // Splat v2f64 / v2i64 @@ -3181,17 +3228,17 @@ let AddedComplexity = 15 in { // Setting the lowest element in the vector. def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), (MOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>; + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), (MOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>; + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>, + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>, Requires<[HasSSE2]>; def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>, + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>, Requires<[HasSSE2]>; } @@ -3464,14 +3511,14 @@ let Constraints = "$src1 = $dst" in { let Constraints = "$src1 = $dst" in { multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, bit Commutable = 0> { - def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), + def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>, OpSize { let isCommutable = Commutable; } - def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), + def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (OpNode VR128:$src1, @@ -3949,15 +3996,15 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, VR128:$src3, i8imm:$src5), "#PCMPESTRM128rr PSEUDO!", - [(set VR128:$dst, - (int_x86_sse42_pcmpestrm128 + [(set VR128:$dst, + (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize; def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), "#PCMPESTRM128rm PSEUDO!", - [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, + [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 + VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, OpSize; } @@ -3972,7 +4019,7 @@ def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), let Defs = [ECX, EFLAGS] in { multiclass SS42AI_pcmpistri<Intrinsic IntId128> { - def rr : SS42AI<0x63, MRMSrcReg, (outs), + def rr : SS42AI<0x63, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), @@ -4003,7 +4050,7 @@ let Uses = [EAX, EDX] in { def rm : SS42AI<0x61, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", - [(set ECX, + [(set ECX, (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), (implicit EFLAGS)]>, OpSize; } @@ -4081,16 +4128,15 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), OpSize; def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), + (ins VR128:$src1, i8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, OpSize; def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), - (ins i128mem:$src1, i32i8imm:$src2), + (ins i128mem:$src1, i8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), imm:$src2))]>, OpSize; - diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index a3e04b0..98975ea 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -37,7 +37,6 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -145,6 +144,19 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { case X86::XMM7: case X86::XMM15: case X86::MM7: return 7; + case X86::ES: + return 0; + case X86::CS: + return 1; + case X86::SS: + return 2; + case X86::DS: + return 3; + case X86::FS: + return 4; + case X86::GS: + return 5; + default: assert(isVirtualRegister(RegNo) && "Unknown physical register!"); llvm_unreachable("Register allocator hasn't allocated reg correctly yet!"); @@ -158,8 +170,7 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, unsigned SubIdx) const { switch (SubIdx) { default: return 0; - case 1: - // 8-bit + case X86::sub_8bit: if (B == &X86::GR8RegClass) { if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8) return A; @@ -191,12 +202,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR16_NOREXRegClass; else if (A == &X86::GR16_ABCDRegClass) return &X86::GR16_ABCDRegClass; - } else if (B == &X86::FR32RegClass) { - return A; } break; - case 2: - // 8-bit hi + case X86::sub_8bit_hi: if (B == &X86::GR8_ABCD_HRegClass) { if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || A == &X86::GR64_NOREXRegClass || @@ -209,12 +217,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || A == &X86::GR16_NOREXRegClass) return &X86::GR16_ABCDRegClass; - } else if (B == &X86::FR64RegClass) { - return A; } break; - case 3: - // 16-bit + case X86::sub_16bit: if (B == &X86::GR16RegClass) { if (A->getSize() == 4 || A->getSize() == 8) return A; @@ -238,12 +243,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR32_NOREXRegClass; else if (A == &X86::GR32_ABCDRegClass) return &X86::GR64_ABCDRegClass; - } else if (B == &X86::VR128RegClass) { - return A; } break; - case 4: - // 32-bit + case X86::sub_32bit: if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) { if (A->getSize() == 8) return A; @@ -261,6 +263,18 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR64_ABCDRegClass; } break; + case X86::sub_ss: + if (B == &X86::FR32RegClass) + return A; + break; + case X86::sub_sd: + if (B == &X86::FR64RegClass) + return A; + break; + case X86::sub_xmm: + if (B == &X86::VR128RegClass) + return A; + break; } return 0; } @@ -518,6 +532,30 @@ X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const { return Offset; } +static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { + if (is64Bit) { + if (isInt<8>(Imm)) + return X86::SUB64ri8; + return X86::SUB64ri32; + } else { + if (isInt<8>(Imm)) + return X86::SUB32ri8; + return X86::SUB32ri; + } +} + +static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { + if (is64Bit) { + if (isInt<8>(Imm)) + return X86::ADD64ri8; + return X86::ADD64ri32; + } else { + if (isInt<8>(Imm)) + return X86::ADD32ri8; + return X86::ADD32ri; + } +} + void X86RegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { @@ -537,7 +575,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineInstr *New = 0; if (Old->getOpcode() == getCallFrameSetupOpcode()) { New = BuildMI(MF, Old->getDebugLoc(), - TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri), + TII.get(getSUBriOpcode(Is64Bit, Amount)), StackPtr) .addReg(StackPtr) .addImm(Amount); @@ -549,9 +587,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Amount -= CalleeAmt; if (Amount) { - unsigned Opc = (Amount < 128) ? - (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : - (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri); + unsigned Opc = getADDriOpcode(Is64Bit, Amount); New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr) .addReg(StackPtr) .addImm(Amount); @@ -571,9 +607,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // something off the stack pointer, add it back. We do this until we have // more advanced stack pointer tracking ability. if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { - unsigned Opc = (CalleeAmt < 128) ? - (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : - (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri); + unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt); MachineInstr *Old = I; MachineInstr *New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), @@ -691,13 +725,9 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const TargetInstrInfo &TII) { bool isSub = NumBytes < 0; uint64_t Offset = isSub ? -NumBytes : NumBytes; - unsigned Opc = isSub - ? ((Offset < 128) ? - (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : - (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri)) - : ((Offset < 128) ? - (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : - (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri)); + unsigned Opc = isSub ? + getSUBriOpcode(Is64Bit, Offset) : + getADDriOpcode(Is64Bit, Offset); uint64_t Chunk = (1LL << 31) - 1; DebugLoc DL = MBB.findDebugLoc(MBBI); @@ -899,7 +929,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && !needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->hasCalls() && // No calls. + !MFI->adjustsStack() && // No calls. !Subtarget->isTargetWin64()) { // Win64 has no Red Zone uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; @@ -917,7 +947,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { // size is bigger than the callers. if (TailCallReturnAddrDelta < 0) { MachineInstr *MI = - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri), + BuildMI(MBB, MBBI, DL, + TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), StackPtr) .addReg(StackPtr) .addImm(-TailCallReturnAddrDelta); @@ -1307,7 +1338,7 @@ X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const { // Calculate amount of bytes used for return address storing int stackGrowth = (Is64Bit ? -8 : -4); - // Initial state of the frame pointer is esp+4. + // Initial state of the frame pointer is esp+stackGrowth. MachineLocation Dst(MachineLocation::VirtualFP); MachineLocation Src(StackPtr, stackGrowth); Moves.push_back(MachineMove(0, Dst, Src)); diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index ac96c4c..d0b82e2 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -30,16 +30,6 @@ namespace N86 { }; } -namespace X86 { - /// SubregIndex - The index of various sized subregister classes. Note that - /// these indices must be kept in sync with the class indices in the - /// X86RegisterInfo.td file. - enum SubregIndex { - SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4, - SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3 - }; -} - /// DWARFFlavour - Flavour of dwarf regnumbers /// namespace DWARFFlavour { diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 49a6ca0..91cfaa9 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -18,6 +18,17 @@ // let Namespace = "X86" in { + // Subregister indices. + def sub_8bit : SubRegIndex; + def sub_8bit_hi : SubRegIndex; + def sub_16bit : SubRegIndex; + def sub_32bit : SubRegIndex; + + def sub_ss : SubRegIndex; + def sub_sd : SubRegIndex; + def sub_xmm : SubRegIndex; + + // In the register alias definitions below, we define which registers alias // which others. We only specify which registers the small registers alias, // because the register file generator is smart enough to figure out that @@ -57,17 +68,22 @@ let Namespace = "X86" in { def BH : Register<"bh">, DwarfRegNum<[3, 3, 3]>; // 16-bit registers + let SubRegIndices = [sub_8bit, sub_8bit_hi] in { def AX : RegisterWithSubRegs<"ax", [AL,AH]>, DwarfRegNum<[0, 0, 0]>; def DX : RegisterWithSubRegs<"dx", [DL,DH]>, DwarfRegNum<[1, 2, 2]>; def CX : RegisterWithSubRegs<"cx", [CL,CH]>, DwarfRegNum<[2, 1, 1]>; def BX : RegisterWithSubRegs<"bx", [BL,BH]>, DwarfRegNum<[3, 3, 3]>; + } + let SubRegIndices = [sub_8bit] in { def SI : RegisterWithSubRegs<"si", [SIL]>, DwarfRegNum<[4, 6, 6]>; def DI : RegisterWithSubRegs<"di", [DIL]>, DwarfRegNum<[5, 7, 7]>; def BP : RegisterWithSubRegs<"bp", [BPL]>, DwarfRegNum<[6, 4, 5]>; def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>; + } def IP : Register<"ip">, DwarfRegNum<[16]>; // X86-64 only + let SubRegIndices = [sub_8bit] in { def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>; def R9W : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>; def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>; @@ -76,8 +92,9 @@ let Namespace = "X86" in { def R13W : RegisterWithSubRegs<"r13w", [R13B]>, DwarfRegNum<[13, -2, -2]>; def R14W : RegisterWithSubRegs<"r14w", [R14B]>, DwarfRegNum<[14, -2, -2]>; def R15W : RegisterWithSubRegs<"r15w", [R15B]>, DwarfRegNum<[15, -2, -2]>; - + } // 32-bit registers + let SubRegIndices = [sub_16bit] in { def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[0, 0, 0]>; def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[1, 2, 2]>; def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[2, 1, 1]>; @@ -97,8 +114,10 @@ let Namespace = "X86" in { def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>; def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>; def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>; + } // 64-bit registers, X86-64 only + let SubRegIndices = [sub_32bit] in { def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>; def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>; def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>; @@ -117,6 +136,7 @@ let Namespace = "X86" in { def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>; def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>; def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>; + } // MMX Registers. These are actually aliased to ST0 .. ST7 def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>; @@ -137,7 +157,9 @@ let Namespace = "X86" in { def FP5 : Register<"fp5">; def FP6 : Register<"fp6">; - // XMM Registers, used by the various SSE instruction set extensions + // XMM Registers, used by the various SSE instruction set extensions. + // The sub_ss and sub_sd subregs are the same registers with another regclass. + let CompositeIndices = [(sub_ss), (sub_sd)] in { def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>; def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>; def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>; @@ -156,8 +178,10 @@ let Namespace = "X86" in { def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>; def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>; def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; + } // YMM Registers, used by AVX instructions + let SubRegIndices = [sub_xmm] in { def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>; def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>; def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>; @@ -174,6 +198,7 @@ let Namespace = "X86" in { def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>; def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>; def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>; + } // Floating point stack registers def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>; @@ -207,106 +232,19 @@ let Namespace = "X86" in { def DR7 : Register<"dr7">; // Condition registers - def ECR0 : Register<"ecr0">; - def ECR1 : Register<"ecr1">; - def ECR2 : Register<"ecr2">; - def ECR3 : Register<"ecr3">; - def ECR4 : Register<"ecr4">; - def ECR5 : Register<"ecr5">; - def ECR6 : Register<"ecr6">; - def ECR7 : Register<"ecr7">; - - def RCR0 : Register<"rcr0">; - def RCR1 : Register<"rcr1">; - def RCR2 : Register<"rcr2">; - def RCR3 : Register<"rcr3">; - def RCR4 : Register<"rcr4">; - def RCR5 : Register<"rcr5">; - def RCR6 : Register<"rcr6">; - def RCR7 : Register<"rcr7">; - def RCR8 : Register<"rcr8">; + def CR0 : Register<"cr0">; + def CR1 : Register<"cr1">; + def CR2 : Register<"cr2">; + def CR3 : Register<"cr3">; + def CR4 : Register<"cr4">; + def CR5 : Register<"cr5">; + def CR6 : Register<"cr6">; + def CR7 : Register<"cr7">; + def CR8 : Register<"cr8">; } //===----------------------------------------------------------------------===// -// Subregister Set Definitions... now that we have all of the pieces, define the -// sub registers for each register. -// - -def x86_subreg_8bit : PatLeaf<(i32 1)>; -def x86_subreg_8bit_hi : PatLeaf<(i32 2)>; -def x86_subreg_16bit : PatLeaf<(i32 3)>; -def x86_subreg_32bit : PatLeaf<(i32 4)>; - -def x86_subreg_ss : PatLeaf<(i32 1)>; -def x86_subreg_sd : PatLeaf<(i32 2)>; -def x86_subreg_xmm : PatLeaf<(i32 3)>; - -def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI, - R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W], - [AL, CL, DL, BL, SPL, BPL, SIL, DIL, - R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; - -def : SubRegSet<2, [AX, CX, DX, BX], - [AH, CH, DH, BH]>; - -def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, - R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D], - [AL, CL, DL, BL, SPL, BPL, SIL, DIL, - R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; - -def : SubRegSet<2, [EAX, ECX, EDX, EBX], - [AH, CH, DH, BH]>; - -def : SubRegSet<3, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, - R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D], - [AX, CX, DX, BX, SP, BP, SI, DI, - R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>; - -def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, - R8, R9, R10, R11, R12, R13, R14, R15], - [AL, CL, DL, BL, SPL, BPL, SIL, DIL, - R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; - -def : SubRegSet<2, [RAX, RCX, RDX, RBX], - [AH, CH, DH, BH]>; - -def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, - R8, R9, R10, R11, R12, R13, R14, R15], - [AX, CX, DX, BX, SP, BP, SI, DI, - R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>; - -def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, - R8, R9, R10, R11, R12, R13, R14, R15], - [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, - R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>; - -def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, - YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; - -def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, - YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; - -def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, - YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; - -def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15], - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; - -def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15], - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; - -//===----------------------------------------------------------------------===// // Register Class Definitions... now that we have all of the pieces, define the // top-level register classes. The order specified in the register list is // implicitly defined to be the register allocation order. @@ -370,7 +308,7 @@ def GR8 : RegisterClass<"X86", [i8], 8, def GR16 : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP, R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> { - let SubRegClassList = [GR8, GR8]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -422,7 +360,7 @@ def GR16 : RegisterClass<"X86", [i16], 16, def GR32 : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { - let SubRegClassList = [GR8, GR8, GR16]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -477,7 +415,9 @@ def GR32 : RegisterClass<"X86", [i32], 32, def GR64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, RBX, R14, R15, R12, R13, RBP, RSP, RIP]> { - let SubRegClassList = [GR8, GR8, GR16, GR32]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), + (GR16 sub_16bit), + (GR32 sub_32bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -511,14 +451,8 @@ def DEBUG_REG : RegisterClass<"X86", [i32], 32, } // Control registers. -def CONTROL_REG_32 : RegisterClass<"X86", [i32], 32, - [ECR0, ECR1, ECR2, ECR3, ECR4, ECR5, ECR6, - ECR7]> { -} - -def CONTROL_REG_64 : RegisterClass<"X86", [i64], 64, - [RCR0, RCR1, RCR2, RCR3, RCR4, RCR5, RCR6, - RCR7, RCR8]> { +def CONTROL_REG : RegisterClass<"X86", [i64], 64, + [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8]> { } // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of @@ -532,20 +466,27 @@ def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> { def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> { } def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> { - let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H]; + let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)]; } def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> { - let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD]; + let SubRegClasses = [(GR8_ABCD_L sub_8bit), + (GR8_ABCD_H sub_8bit_hi), + (GR16_ABCD sub_16bit)]; } def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> { - let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD]; + let SubRegClasses = [(GR8_ABCD_L sub_8bit), + (GR8_ABCD_H sub_8bit_hi), + (GR16_ABCD sub_16bit), + (GR32_ABCD sub_32bit)]; } def GR32_TC : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> { - let SubRegClassList = [GR8, GR8, GR16]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; } def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R11]> { - let SubRegClassList = [GR8, GR8, GR16, GR32_TC]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), + (GR16 sub_16bit), + (GR32_TC sub_32bit)]; } // GR8_NOREX - GR8 registers which do not require a REX prefix. @@ -585,7 +526,7 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8, // GR16_NOREX - GR16 registers which do not require a REX prefix. def GR16_NOREX : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP]> { - let SubRegClassList = [GR8_NOREX, GR8_NOREX]; + let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -608,7 +549,8 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16, // GR32_NOREX - GR32 registers which do not require a REX prefix. def GR32_NOREX : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> { - let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX]; + let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), + (GR16_NOREX sub_16bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -631,7 +573,9 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32, // GR64_NOREX - GR64 registers which do not require a REX prefix. def GR64_NOREX : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> { - let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX]; + let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), + (GR16_NOREX sub_16bit), + (GR32_NOREX sub_32bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -656,7 +600,7 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64, def GR32_NOSP : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { - let SubRegClassList = [GR8, GR8, GR16]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -709,7 +653,9 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32, def GR64_NOSP : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, RBX, R14, R15, R12, R13, RBP]> { - let SubRegClassList = [GR8, GR8, GR16, GR32_NOSP]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), + (GR16 sub_16bit), + (GR32_NOSP sub_32bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -734,7 +680,9 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64, // GR64_NOREX_NOSP - GR64_NOREX registers except RSP. def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, RBX, RBP]> { - let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX]; + let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), + (GR16_NOREX sub_16bit), + (GR32_NOREX sub_32bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -758,7 +706,9 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, // A class to support the 'A' assembler constraint: EAX then EDX. def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> { - let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD]; + let SubRegClasses = [(GR8_ABCD_L sub_8bit), + (GR8_ABCD_H sub_8bit_hi), + (GR16_ABCD sub_16bit)]; } // Scalar SSE2 floating point registers. @@ -836,7 +786,8 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]> { - let SubRegClassList = [FR32, FR64]; + let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)]; + let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -856,7 +807,7 @@ def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15]> { - let SubRegClassList = [FR32, FR64, VR128]; + let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)]; } // Status flags registers. diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index cd87b82..6297a27 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -12,11 +12,232 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "x86-selectiondag-info" -#include "X86SelectionDAGInfo.h" +#include "X86TargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/SelectionDAG.h" using namespace llvm; -X86SelectionDAGInfo::X86SelectionDAGInfo() { +X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) : + TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget<X86Subtarget>()), + TLI(*TM.getTargetLowering()) { } X86SelectionDAGInfo::~X86SelectionDAGInfo() { } + +SDValue +X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, + const Value *DstSV, + uint64_t DstSVOff) const { + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + + // If not DWORD aligned or size is more than the threshold, call the library. + // The libc version is likely to be faster for these cases. It can use the + // address value and run time information about the CPU. + if ((Align & 3) != 0 || + !ConstantSize || + ConstantSize->getZExtValue() > + Subtarget->getMaxInlineSizeThreshold()) { + SDValue InFlag(0, 0); + + // Check to see if there is a specialized entry-point for memory zeroing. + ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); + + if (const char *bzeroEntry = V && + V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { + EVT IntPtr = TLI.getPointerTy(); + const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; + Entry.Ty = IntPtrTy; + Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + std::pair<SDValue,SDValue> CallResult = + TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), + false, false, false, false, + 0, CallingConv::C, false, /*isReturnValueUsed=*/false, + DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, + DAG, dl); + return CallResult.second; + } + + // Otherwise have the target-independent code call memset. + return SDValue(); + } + + uint64_t SizeVal = ConstantSize->getZExtValue(); + SDValue InFlag(0, 0); + EVT AVT; + SDValue Count; + ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); + unsigned BytesLeft = 0; + bool TwoRepStos = false; + if (ValC) { + unsigned ValReg; + uint64_t Val = ValC->getZExtValue() & 255; + + // If the value is a constant, then we can potentially use larger sets. + switch (Align & 3) { + case 2: // WORD aligned + AVT = MVT::i16; + ValReg = X86::AX; + Val = (Val << 8) | Val; + break; + case 0: // DWORD aligned + AVT = MVT::i32; + ValReg = X86::EAX; + Val = (Val << 8) | Val; + Val = (Val << 16) | Val; + if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned + AVT = MVT::i64; + ValReg = X86::RAX; + Val = (Val << 32) | Val; + } + break; + default: // Byte aligned + AVT = MVT::i8; + ValReg = X86::AL; + Count = DAG.getIntPtrConstant(SizeVal); + break; + } + + if (AVT.bitsGT(MVT::i8)) { + unsigned UBytes = AVT.getSizeInBits() / 8; + Count = DAG.getIntPtrConstant(SizeVal / UBytes); + BytesLeft = SizeVal % UBytes; + } + + Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), + InFlag); + InFlag = Chain.getValue(1); + } else { + AVT = MVT::i8; + Count = DAG.getIntPtrConstant(SizeVal); + Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); + InFlag = Chain.getValue(1); + } + + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : + X86::ECX, + Count, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : + X86::EDI, + Dst, InFlag); + InFlag = Chain.getValue(1); + + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; + Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); + + if (TwoRepStos) { + InFlag = Chain.getValue(1); + Count = Size; + EVT CVT = Count.getValueType(); + SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, + DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); + Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : + X86::ECX, + Left, InFlag); + InFlag = Chain.getValue(1); + Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; + Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); + } else if (BytesLeft) { + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + EVT AddrVT = Dst.getValueType(); + EVT SizeVT = Size.getValueType(); + + Chain = DAG.getMemset(Chain, dl, + DAG.getNode(ISD::ADD, dl, AddrVT, Dst, + DAG.getConstant(Offset, AddrVT)), + Src, + DAG.getConstant(BytesLeft, SizeVT), + Align, isVolatile, DstSV, DstSVOff + Offset); + } + + // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. + return Chain; +} + +SDValue +X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const { + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) + return SDValue(); + + /// If not DWORD aligned, call the library. + if ((Align & 3) != 0) + return SDValue(); + + // DWORD aligned + EVT AVT = MVT::i32; + if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned + AVT = MVT::i64; + + unsigned UBytes = AVT.getSizeInBits() / 8; + unsigned CountVal = SizeVal / UBytes; + SDValue Count = DAG.getIntPtrConstant(CountVal); + unsigned BytesLeft = SizeVal % UBytes; + + SDValue InFlag(0, 0); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : + X86::ECX, + Count, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : + X86::EDI, + Dst, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : + X86::ESI, + Src, InFlag); + InFlag = Chain.getValue(1); + + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; + SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, + array_lengthof(Ops)); + + SmallVector<SDValue, 4> Results; + Results.push_back(RepMovs); + if (BytesLeft) { + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + EVT DstVT = Dst.getValueType(); + EVT SrcVT = Src.getValueType(); + EVT SizeVT = Size.getValueType(); + Results.push_back(DAG.getMemcpy(Chain, dl, + DAG.getNode(ISD::ADD, dl, DstVT, Dst, + DAG.getConstant(Offset, DstVT)), + DAG.getNode(ISD::ADD, dl, SrcVT, Src, + DAG.getConstant(Offset, SrcVT)), + DAG.getConstant(BytesLeft, SizeVT), + Align, isVolatile, AlwaysInline, + DstSV, DstSVOff + Offset, + SrcSV, SrcSVOff + Offset)); + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Results[0], Results.size()); +} diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h index 9834754..4f30f31 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.h +++ b/lib/Target/X86/X86SelectionDAGInfo.h @@ -18,10 +18,40 @@ namespace llvm { +class X86TargetLowering; +class X86TargetMachine; +class X86Subtarget; + class X86SelectionDAGInfo : public TargetSelectionDAGInfo { + /// Subtarget - Keep a pointer to the X86Subtarget around so that we can + /// make the right decision when generating code for different targets. + const X86Subtarget *Subtarget; + + const X86TargetLowering &TLI; + public: - X86SelectionDAGInfo(); + explicit X86SelectionDAGInfo(const X86TargetMachine &TM); ~X86SelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, + const Value *DstSV, + uint64_t DstSVOff) const; + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const; }; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index f39904e..f2c5058 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -17,17 +17,13 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" -#include "llvm/Support/CommandLine.h" - using namespace llvm; -static cl::opt<bool> DisableSSEDomain("disable-sse-domain", - cl::init(false), cl::Hidden, - cl::desc("Disable SSE Domain Fixing")); - static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { @@ -43,6 +39,18 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { } } +static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, + MCContext &Ctx, TargetAsmBackend &TAB, + raw_ostream &_OS, + MCCodeEmitter *_Emitter, + bool RelaxAll) { + Triple TheTriple(TT); + switch (TheTriple.getOS()) { + default: + return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); + } +} + extern "C" void LLVMInitializeX86Target() { // Register the target. RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target); @@ -63,6 +71,12 @@ extern "C" void LLVMInitializeX86Target() { createX86_32AsmBackend); TargetRegistry::RegisterAsmBackend(TheX86_64Target, createX86_64AsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterObjectStreamer(TheX86_32Target, + createMCStreamer); + TargetRegistry::RegisterObjectStreamer(TheX86_64Target, + createMCStreamer); } @@ -88,7 +102,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, Subtarget.getStackAlignment(), (Subtarget.isTargetWin64() ? -40 : (Subtarget.is64Bit() ? -8 : -4))), - InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) { + InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this), + ELFWriterInfo(*this) { DefRelocModel = getRelocationModel(); // If no relocation model was picked, default as appropriate for the target. @@ -178,8 +193,7 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2() && - !DisableSSEDomain) { + if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) { PM.add(createSSEDomainFixPass()); return true; } diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index dc4234c..f9fb424 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -23,6 +23,7 @@ #include "X86JITInfo.h" #include "X86Subtarget.h" #include "X86ISelLowering.h" +#include "X86SelectionDAGInfo.h" namespace llvm { @@ -35,6 +36,7 @@ class X86TargetMachine : public LLVMTargetMachine { X86InstrInfo InstrInfo; X86JITInfo JITInfo; X86TargetLowering TLInfo; + X86SelectionDAGInfo TSInfo; X86ELFWriterInfo ELFWriterInfo; Reloc::Model DefRelocModel; // Reloc model before it's overridden. @@ -54,6 +56,9 @@ public: virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; } + virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } virtual const X86RegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } |