From cd749a9c07f1de2fb8affde90537efa4bc3e7c54 Mon Sep 17 00:00:00 2001 From: rdivacky Date: Wed, 14 Oct 2009 17:57:32 +0000 Subject: Update llvm to r84119. --- lib/Target/X86/X86InstrInfo.cpp | 1336 ++++++++++++++++++++------------------- 1 file changed, 689 insertions(+), 647 deletions(-) (limited to 'lib/Target/X86/X86InstrInfo.cpp') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index e5d84c5..e8a39d1 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -18,8 +18,8 @@ #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" -#include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -27,24 +27,24 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" using namespace llvm; -namespace { - cl::opt - NoFusing("disable-spill-fusing", - cl::desc("Disable fusing of spill code into instructions")); - cl::opt - PrintFailedFusing("print-failed-fuse-candidates", - cl::desc("Print instructions that the allocator wants to" - " fuse, but the X86 backend currently can't"), - cl::Hidden); - cl::opt - ReMatPICStubLoad("remat-pic-stub-load", - cl::desc("Re-materialize load from stub in PIC mode"), - cl::init(false), cl::Hidden); -} +static cl::opt +NoFusing("disable-spill-fusing", + cl::desc("Disable fusing of spill code into instructions")); +static cl::opt +PrintFailedFusing("print-failed-fuse-candidates", + cl::desc("Print instructions that the allocator wants to" + " fuse, but the X86 backend currently can't"), + cl::Hidden); +static cl::opt +ReMatPICStubLoad("remat-pic-stub-load", + cl::desc("Re-materialize load from stub in PIC mode"), + cl::init(false), cl::Hidden); X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), @@ -212,9 +212,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) unsigned RegOp = OpTbl2Addr[i][0]; unsigned MemOp = OpTbl2Addr[i][1]; if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,0))).second) assert(false && "Duplicated entries?"); - unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); // Index 0,folded load and store + // Index 0, folded load and store, no alignment requirement. + unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, std::make_pair(RegOp, AuxInfo))).second) @@ -222,93 +223,94 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) } // If the third value is 1, then it's folding either a load or a store. - static const unsigned OpTbl0[][3] = { - { X86::BT16ri8, X86::BT16mi8, 1 }, - { X86::BT32ri8, X86::BT32mi8, 1 }, - { X86::BT64ri8, X86::BT64mi8, 1 }, - { X86::CALL32r, X86::CALL32m, 1 }, - { X86::CALL64r, X86::CALL64m, 1 }, - { X86::CMP16ri, X86::CMP16mi, 1 }, - { X86::CMP16ri8, X86::CMP16mi8, 1 }, - { X86::CMP16rr, X86::CMP16mr, 1 }, - { X86::CMP32ri, X86::CMP32mi, 1 }, - { X86::CMP32ri8, X86::CMP32mi8, 1 }, - { X86::CMP32rr, X86::CMP32mr, 1 }, - { X86::CMP64ri32, X86::CMP64mi32, 1 }, - { X86::CMP64ri8, X86::CMP64mi8, 1 }, - { X86::CMP64rr, X86::CMP64mr, 1 }, - { X86::CMP8ri, X86::CMP8mi, 1 }, - { X86::CMP8rr, X86::CMP8mr, 1 }, - { X86::DIV16r, X86::DIV16m, 1 }, - { X86::DIV32r, X86::DIV32m, 1 }, - { X86::DIV64r, X86::DIV64m, 1 }, - { X86::DIV8r, X86::DIV8m, 1 }, - { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0 }, - { X86::FsMOVAPDrr, X86::MOVSDmr, 0 }, - { X86::FsMOVAPSrr, X86::MOVSSmr, 0 }, - { X86::IDIV16r, X86::IDIV16m, 1 }, - { X86::IDIV32r, X86::IDIV32m, 1 }, - { X86::IDIV64r, X86::IDIV64m, 1 }, - { X86::IDIV8r, X86::IDIV8m, 1 }, - { X86::IMUL16r, X86::IMUL16m, 1 }, - { X86::IMUL32r, X86::IMUL32m, 1 }, - { X86::IMUL64r, X86::IMUL64m, 1 }, - { X86::IMUL8r, X86::IMUL8m, 1 }, - { X86::JMP32r, X86::JMP32m, 1 }, - { X86::JMP64r, X86::JMP64m, 1 }, - { X86::MOV16ri, X86::MOV16mi, 0 }, - { X86::MOV16rr, X86::MOV16mr, 0 }, - { X86::MOV32ri, X86::MOV32mi, 0 }, - { X86::MOV32rr, X86::MOV32mr, 0 }, - { X86::MOV64ri32, X86::MOV64mi32, 0 }, - { X86::MOV64rr, X86::MOV64mr, 0 }, - { X86::MOV8ri, X86::MOV8mi, 0 }, - { X86::MOV8rr, X86::MOV8mr, 0 }, - { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0 }, - { X86::MOVAPDrr, X86::MOVAPDmr, 0 }, - { X86::MOVAPSrr, X86::MOVAPSmr, 0 }, - { X86::MOVDQArr, X86::MOVDQAmr, 0 }, - { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0 }, - { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0 }, - { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0 }, - { X86::MOVSDrr, X86::MOVSDmr, 0 }, - { X86::MOVSDto64rr, X86::MOVSDto64mr, 0 }, - { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0 }, - { X86::MOVSSrr, X86::MOVSSmr, 0 }, - { X86::MOVUPDrr, X86::MOVUPDmr, 0 }, - { X86::MOVUPSrr, X86::MOVUPSmr, 0 }, - { X86::MUL16r, X86::MUL16m, 1 }, - { X86::MUL32r, X86::MUL32m, 1 }, - { X86::MUL64r, X86::MUL64m, 1 }, - { X86::MUL8r, X86::MUL8m, 1 }, - { X86::SETAEr, X86::SETAEm, 0 }, - { X86::SETAr, X86::SETAm, 0 }, - { X86::SETBEr, X86::SETBEm, 0 }, - { X86::SETBr, X86::SETBm, 0 }, - { X86::SETEr, X86::SETEm, 0 }, - { X86::SETGEr, X86::SETGEm, 0 }, - { X86::SETGr, X86::SETGm, 0 }, - { X86::SETLEr, X86::SETLEm, 0 }, - { X86::SETLr, X86::SETLm, 0 }, - { X86::SETNEr, X86::SETNEm, 0 }, - { X86::SETNOr, X86::SETNOm, 0 }, - { X86::SETNPr, X86::SETNPm, 0 }, - { X86::SETNSr, X86::SETNSm, 0 }, - { X86::SETOr, X86::SETOm, 0 }, - { X86::SETPr, X86::SETPm, 0 }, - { X86::SETSr, X86::SETSm, 0 }, - { X86::TAILJMPr, X86::TAILJMPm, 1 }, - { X86::TEST16ri, X86::TEST16mi, 1 }, - { X86::TEST32ri, X86::TEST32mi, 1 }, - { X86::TEST64ri32, X86::TEST64mi32, 1 }, - { X86::TEST8ri, X86::TEST8mi, 1 } + static const unsigned OpTbl0[][4] = { + { X86::BT16ri8, X86::BT16mi8, 1, 0 }, + { X86::BT32ri8, X86::BT32mi8, 1, 0 }, + { X86::BT64ri8, X86::BT64mi8, 1, 0 }, + { X86::CALL32r, X86::CALL32m, 1, 0 }, + { X86::CALL64r, X86::CALL64m, 1, 0 }, + { X86::CMP16ri, X86::CMP16mi, 1, 0 }, + { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, + { X86::CMP16rr, X86::CMP16mr, 1, 0 }, + { X86::CMP32ri, X86::CMP32mi, 1, 0 }, + { X86::CMP32ri8, X86::CMP32mi8, 1, 0 }, + { X86::CMP32rr, X86::CMP32mr, 1, 0 }, + { X86::CMP64ri32, X86::CMP64mi32, 1, 0 }, + { X86::CMP64ri8, X86::CMP64mi8, 1, 0 }, + { X86::CMP64rr, X86::CMP64mr, 1, 0 }, + { X86::CMP8ri, X86::CMP8mi, 1, 0 }, + { X86::CMP8rr, X86::CMP8mr, 1, 0 }, + { X86::DIV16r, X86::DIV16m, 1, 0 }, + { X86::DIV32r, X86::DIV32m, 1, 0 }, + { X86::DIV64r, X86::DIV64m, 1, 0 }, + { X86::DIV8r, X86::DIV8m, 1, 0 }, + { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, + { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 }, + { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 }, + { X86::IDIV16r, X86::IDIV16m, 1, 0 }, + { X86::IDIV32r, X86::IDIV32m, 1, 0 }, + { X86::IDIV64r, X86::IDIV64m, 1, 0 }, + { X86::IDIV8r, X86::IDIV8m, 1, 0 }, + { X86::IMUL16r, X86::IMUL16m, 1, 0 }, + { X86::IMUL32r, X86::IMUL32m, 1, 0 }, + { X86::IMUL64r, X86::IMUL64m, 1, 0 }, + { X86::IMUL8r, X86::IMUL8m, 1, 0 }, + { X86::JMP32r, X86::JMP32m, 1, 0 }, + { X86::JMP64r, X86::JMP64m, 1, 0 }, + { X86::MOV16ri, X86::MOV16mi, 0, 0 }, + { X86::MOV16rr, X86::MOV16mr, 0, 0 }, + { X86::MOV32ri, X86::MOV32mi, 0, 0 }, + { X86::MOV32rr, X86::MOV32mr, 0, 0 }, + { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, + { X86::MOV64rr, X86::MOV64mr, 0, 0 }, + { X86::MOV8ri, X86::MOV8mi, 0, 0 }, + { X86::MOV8rr, X86::MOV8mr, 0, 0 }, + { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 }, + { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, + { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, + { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, + { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, + { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, + { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 }, + { X86::MOVSDrr, X86::MOVSDmr, 0, 0 }, + { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, + { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, + { X86::MOVSSrr, X86::MOVSSmr, 0, 0 }, + { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, + { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, + { X86::MUL16r, X86::MUL16m, 1, 0 }, + { X86::MUL32r, X86::MUL32m, 1, 0 }, + { X86::MUL64r, X86::MUL64m, 1, 0 }, + { X86::MUL8r, X86::MUL8m, 1, 0 }, + { X86::SETAEr, X86::SETAEm, 0, 0 }, + { X86::SETAr, X86::SETAm, 0, 0 }, + { X86::SETBEr, X86::SETBEm, 0, 0 }, + { X86::SETBr, X86::SETBm, 0, 0 }, + { X86::SETEr, X86::SETEm, 0, 0 }, + { X86::SETGEr, X86::SETGEm, 0, 0 }, + { X86::SETGr, X86::SETGm, 0, 0 }, + { X86::SETLEr, X86::SETLEm, 0, 0 }, + { X86::SETLr, X86::SETLm, 0, 0 }, + { X86::SETNEr, X86::SETNEm, 0, 0 }, + { X86::SETNOr, X86::SETNOm, 0, 0 }, + { X86::SETNPr, X86::SETNPm, 0, 0 }, + { X86::SETNSr, X86::SETNSm, 0, 0 }, + { X86::SETOr, X86::SETOm, 0, 0 }, + { X86::SETPr, X86::SETPm, 0, 0 }, + { X86::SETSr, X86::SETSm, 0, 0 }, + { X86::TAILJMPr, X86::TAILJMPm, 1, 0 }, + { X86::TEST16ri, X86::TEST16mi, 1, 0 }, + { X86::TEST32ri, X86::TEST32mi, 1, 0 }, + { X86::TEST64ri32, X86::TEST64mi32, 1, 0 }, + { X86::TEST8ri, X86::TEST8mi, 1, 0 } }; for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { unsigned RegOp = OpTbl0[i][0]; unsigned MemOp = OpTbl0[i][1]; + unsigned Align = OpTbl0[i][3]; if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,Align))).second) assert(false && "Duplicated entries?"); unsigned FoldedLoad = OpTbl0[i][2]; // Index 0, folded load or store. @@ -319,338 +321,342 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) AmbEntries.push_back(MemOp); } - static const unsigned OpTbl1[][2] = { - { X86::CMP16rr, X86::CMP16rm }, - { X86::CMP32rr, X86::CMP32rm }, - { X86::CMP64rr, X86::CMP64rm }, - { X86::CMP8rr, X86::CMP8rm }, - { X86::CVTSD2SSrr, X86::CVTSD2SSrm }, - { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm }, - { X86::CVTSI2SDrr, X86::CVTSI2SDrm }, - { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm }, - { X86::CVTSI2SSrr, X86::CVTSI2SSrm }, - { X86::CVTSS2SDrr, X86::CVTSS2SDrm }, - { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm }, - { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm }, - { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm }, - { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm }, - { X86::FsMOVAPDrr, X86::MOVSDrm }, - { X86::FsMOVAPSrr, X86::MOVSSrm }, - { X86::IMUL16rri, X86::IMUL16rmi }, - { X86::IMUL16rri8, X86::IMUL16rmi8 }, - { X86::IMUL32rri, X86::IMUL32rmi }, - { X86::IMUL32rri8, X86::IMUL32rmi8 }, - { X86::IMUL64rri32, X86::IMUL64rmi32 }, - { X86::IMUL64rri8, X86::IMUL64rmi8 }, - { X86::Int_CMPSDrr, X86::Int_CMPSDrm }, - { X86::Int_CMPSSrr, X86::Int_CMPSSrm }, - { X86::Int_COMISDrr, X86::Int_COMISDrm }, - { X86::Int_COMISSrr, X86::Int_COMISSrm }, - { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm }, - { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm }, - { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm }, - { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm }, - { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm }, - { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm }, - { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm }, - { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm }, - { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm }, - { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm }, - { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm }, - { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm }, - { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm }, - { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm }, - { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm }, - { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm }, - { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm }, - { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm }, - { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm }, - { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm }, - { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm }, - { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm }, - { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm }, - { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm }, - { X86::MOV16rr, X86::MOV16rm }, - { X86::MOV32rr, X86::MOV32rm }, - { X86::MOV64rr, X86::MOV64rm }, - { X86::MOV64toPQIrr, X86::MOVQI2PQIrm }, - { X86::MOV64toSDrr, X86::MOV64toSDrm }, - { X86::MOV8rr, X86::MOV8rm }, - { X86::MOVAPDrr, X86::MOVAPDrm }, - { X86::MOVAPSrr, X86::MOVAPSrm }, - { X86::MOVDDUPrr, X86::MOVDDUPrm }, - { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm }, - { X86::MOVDI2SSrr, X86::MOVDI2SSrm }, - { X86::MOVDQArr, X86::MOVDQArm }, - { X86::MOVSD2PDrr, X86::MOVSD2PDrm }, - { X86::MOVSDrr, X86::MOVSDrm }, - { X86::MOVSHDUPrr, X86::MOVSHDUPrm }, - { X86::MOVSLDUPrr, X86::MOVSLDUPrm }, - { X86::MOVSS2PSrr, X86::MOVSS2PSrm }, - { X86::MOVSSrr, X86::MOVSSrm }, - { X86::MOVSX16rr8, X86::MOVSX16rm8 }, - { X86::MOVSX32rr16, X86::MOVSX32rm16 }, - { X86::MOVSX32rr8, X86::MOVSX32rm8 }, - { X86::MOVSX64rr16, X86::MOVSX64rm16 }, - { X86::MOVSX64rr32, X86::MOVSX64rm32 }, - { X86::MOVSX64rr8, X86::MOVSX64rm8 }, - { X86::MOVUPDrr, X86::MOVUPDrm }, - { X86::MOVUPSrr, X86::MOVUPSrm }, - { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm }, - { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm }, - { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm }, - { X86::MOVZX16rr8, X86::MOVZX16rm8 }, - { X86::MOVZX32rr16, X86::MOVZX32rm16 }, - { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8 }, - { X86::MOVZX32rr8, X86::MOVZX32rm8 }, - { X86::MOVZX64rr16, X86::MOVZX64rm16 }, - { X86::MOVZX64rr32, X86::MOVZX64rm32 }, - { X86::MOVZX64rr8, X86::MOVZX64rm8 }, - { X86::PSHUFDri, X86::PSHUFDmi }, - { X86::PSHUFHWri, X86::PSHUFHWmi }, - { X86::PSHUFLWri, X86::PSHUFLWmi }, - { X86::RCPPSr, X86::RCPPSm }, - { X86::RCPPSr_Int, X86::RCPPSm_Int }, - { X86::RSQRTPSr, X86::RSQRTPSm }, - { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int }, - { X86::RSQRTSSr, X86::RSQRTSSm }, - { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int }, - { X86::SQRTPDr, X86::SQRTPDm }, - { X86::SQRTPDr_Int, X86::SQRTPDm_Int }, - { X86::SQRTPSr, X86::SQRTPSm }, - { X86::SQRTPSr_Int, X86::SQRTPSm_Int }, - { X86::SQRTSDr, X86::SQRTSDm }, - { X86::SQRTSDr_Int, X86::SQRTSDm_Int }, - { X86::SQRTSSr, X86::SQRTSSm }, - { X86::SQRTSSr_Int, X86::SQRTSSm_Int }, - { X86::TEST16rr, X86::TEST16rm }, - { X86::TEST32rr, X86::TEST32rm }, - { X86::TEST64rr, X86::TEST64rm }, - { X86::TEST8rr, X86::TEST8rm }, + static const unsigned OpTbl1[][3] = { + { X86::CMP16rr, X86::CMP16rm, 0 }, + { X86::CMP32rr, X86::CMP32rm, 0 }, + { X86::CMP64rr, X86::CMP64rm, 0 }, + { X86::CMP8rr, X86::CMP8rm, 0 }, + { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, + { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, + { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, + { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, + { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, + { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, + { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, + { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, + { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, + { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, + { X86::FsMOVAPDrr, X86::MOVSDrm, 0 }, + { X86::FsMOVAPSrr, X86::MOVSSrm, 0 }, + { X86::IMUL16rri, X86::IMUL16rmi, 0 }, + { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, + { X86::IMUL32rri, X86::IMUL32rmi, 0 }, + { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, + { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, + { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, + { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, + { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, + { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, + { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 }, + { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 }, + { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, + { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, + { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, + { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 }, + { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 }, + { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, + { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, + { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, + { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, + { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, + { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, + { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 }, + { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 }, + { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, + { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, + { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, + { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, + { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, + { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, + { X86::MOV16rr, X86::MOV16rm, 0 }, + { X86::MOV32rr, X86::MOV32rm, 0 }, + { X86::MOV64rr, X86::MOV64rm, 0 }, + { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, + { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, + { X86::MOV8rr, X86::MOV8rm, 0 }, + { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, + { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, + { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, + { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, + { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, + { X86::MOVDQArr, X86::MOVDQArm, 16 }, + { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 }, + { X86::MOVSDrr, X86::MOVSDrm, 0 }, + { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, + { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, + { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 }, + { X86::MOVSSrr, X86::MOVSSrm, 0 }, + { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, + { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, + { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, + { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, + { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, + { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, + { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, + { X86::MOVUPSrr, X86::MOVUPSrm, 16 }, + { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, + { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, + { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, + { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, + { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, + { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, + { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, + { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, + { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, + { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, + { X86::PSHUFDri, X86::PSHUFDmi, 16 }, + { X86::PSHUFHWri, X86::PSHUFHWmi, 16 }, + { X86::PSHUFLWri, X86::PSHUFLWmi, 16 }, + { X86::RCPPSr, X86::RCPPSm, 16 }, + { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 }, + { X86::RSQRTPSr, X86::RSQRTPSm, 16 }, + { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 }, + { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, + { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, + { X86::SQRTPDr, X86::SQRTPDm, 16 }, + { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 }, + { X86::SQRTPSr, X86::SQRTPSm, 16 }, + { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 }, + { X86::SQRTSDr, X86::SQRTSDm, 0 }, + { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, + { X86::SQRTSSr, X86::SQRTSSm, 0 }, + { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, + { X86::TEST16rr, X86::TEST16rm, 0 }, + { X86::TEST32rr, X86::TEST32rm, 0 }, + { X86::TEST64rr, X86::TEST64rm, 0 }, + { X86::TEST8rr, X86::TEST8rm, 0 }, // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 - { X86::UCOMISDrr, X86::UCOMISDrm }, - { X86::UCOMISSrr, X86::UCOMISSrm } + { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, + { X86::UCOMISSrr, X86::UCOMISSrm, 0 } }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { unsigned RegOp = OpTbl1[i][0]; unsigned MemOp = OpTbl1[i][1]; + unsigned Align = OpTbl1[i][2]; if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,Align))).second) assert(false && "Duplicated entries?"); - unsigned AuxInfo = 1 | (1 << 4); // Index 1, folded load + // Index 1, folded load + unsigned AuxInfo = 1 | (1 << 4); if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, std::make_pair(RegOp, AuxInfo))).second) AmbEntries.push_back(MemOp); } - static const unsigned OpTbl2[][2] = { - { X86::ADC32rr, X86::ADC32rm }, - { X86::ADC64rr, X86::ADC64rm }, - { X86::ADD16rr, X86::ADD16rm }, - { X86::ADD32rr, X86::ADD32rm }, - { X86::ADD64rr, X86::ADD64rm }, - { X86::ADD8rr, X86::ADD8rm }, - { X86::ADDPDrr, X86::ADDPDrm }, - { X86::ADDPSrr, X86::ADDPSrm }, - { X86::ADDSDrr, X86::ADDSDrm }, - { X86::ADDSSrr, X86::ADDSSrm }, - { X86::ADDSUBPDrr, X86::ADDSUBPDrm }, - { X86::ADDSUBPSrr, X86::ADDSUBPSrm }, - { X86::AND16rr, X86::AND16rm }, - { X86::AND32rr, X86::AND32rm }, - { X86::AND64rr, X86::AND64rm }, - { X86::AND8rr, X86::AND8rm }, - { X86::ANDNPDrr, X86::ANDNPDrm }, - { X86::ANDNPSrr, X86::ANDNPSrm }, - { X86::ANDPDrr, X86::ANDPDrm }, - { X86::ANDPSrr, X86::ANDPSrm }, - { X86::CMOVA16rr, X86::CMOVA16rm }, - { X86::CMOVA32rr, X86::CMOVA32rm }, - { X86::CMOVA64rr, X86::CMOVA64rm }, - { X86::CMOVAE16rr, X86::CMOVAE16rm }, - { X86::CMOVAE32rr, X86::CMOVAE32rm }, - { X86::CMOVAE64rr, X86::CMOVAE64rm }, - { X86::CMOVB16rr, X86::CMOVB16rm }, - { X86::CMOVB32rr, X86::CMOVB32rm }, - { X86::CMOVB64rr, X86::CMOVB64rm }, - { X86::CMOVBE16rr, X86::CMOVBE16rm }, - { X86::CMOVBE32rr, X86::CMOVBE32rm }, - { X86::CMOVBE64rr, X86::CMOVBE64rm }, - { X86::CMOVE16rr, X86::CMOVE16rm }, - { X86::CMOVE32rr, X86::CMOVE32rm }, - { X86::CMOVE64rr, X86::CMOVE64rm }, - { X86::CMOVG16rr, X86::CMOVG16rm }, - { X86::CMOVG32rr, X86::CMOVG32rm }, - { X86::CMOVG64rr, X86::CMOVG64rm }, - { X86::CMOVGE16rr, X86::CMOVGE16rm }, - { X86::CMOVGE32rr, X86::CMOVGE32rm }, - { X86::CMOVGE64rr, X86::CMOVGE64rm }, - { X86::CMOVL16rr, X86::CMOVL16rm }, - { X86::CMOVL32rr, X86::CMOVL32rm }, - { X86::CMOVL64rr, X86::CMOVL64rm }, - { X86::CMOVLE16rr, X86::CMOVLE16rm }, - { X86::CMOVLE32rr, X86::CMOVLE32rm }, - { X86::CMOVLE64rr, X86::CMOVLE64rm }, - { X86::CMOVNE16rr, X86::CMOVNE16rm }, - { X86::CMOVNE32rr, X86::CMOVNE32rm }, - { X86::CMOVNE64rr, X86::CMOVNE64rm }, - { X86::CMOVNO16rr, X86::CMOVNO16rm }, - { X86::CMOVNO32rr, X86::CMOVNO32rm }, - { X86::CMOVNO64rr, X86::CMOVNO64rm }, - { X86::CMOVNP16rr, X86::CMOVNP16rm }, - { X86::CMOVNP32rr, X86::CMOVNP32rm }, - { X86::CMOVNP64rr, X86::CMOVNP64rm }, - { X86::CMOVNS16rr, X86::CMOVNS16rm }, - { X86::CMOVNS32rr, X86::CMOVNS32rm }, - { X86::CMOVNS64rr, X86::CMOVNS64rm }, - { X86::CMOVO16rr, X86::CMOVO16rm }, - { X86::CMOVO32rr, X86::CMOVO32rm }, - { X86::CMOVO64rr, X86::CMOVO64rm }, - { X86::CMOVP16rr, X86::CMOVP16rm }, - { X86::CMOVP32rr, X86::CMOVP32rm }, - { X86::CMOVP64rr, X86::CMOVP64rm }, - { X86::CMOVS16rr, X86::CMOVS16rm }, - { X86::CMOVS32rr, X86::CMOVS32rm }, - { X86::CMOVS64rr, X86::CMOVS64rm }, - { X86::CMPPDrri, X86::CMPPDrmi }, - { X86::CMPPSrri, X86::CMPPSrmi }, - { X86::CMPSDrr, X86::CMPSDrm }, - { X86::CMPSSrr, X86::CMPSSrm }, - { X86::DIVPDrr, X86::DIVPDrm }, - { X86::DIVPSrr, X86::DIVPSrm }, - { X86::DIVSDrr, X86::DIVSDrm }, - { X86::DIVSSrr, X86::DIVSSrm }, - { X86::FsANDNPDrr, X86::FsANDNPDrm }, - { X86::FsANDNPSrr, X86::FsANDNPSrm }, - { X86::FsANDPDrr, X86::FsANDPDrm }, - { X86::FsANDPSrr, X86::FsANDPSrm }, - { X86::FsORPDrr, X86::FsORPDrm }, - { X86::FsORPSrr, X86::FsORPSrm }, - { X86::FsXORPDrr, X86::FsXORPDrm }, - { X86::FsXORPSrr, X86::FsXORPSrm }, - { X86::HADDPDrr, X86::HADDPDrm }, - { X86::HADDPSrr, X86::HADDPSrm }, - { X86::HSUBPDrr, X86::HSUBPDrm }, - { X86::HSUBPSrr, X86::HSUBPSrm }, - { X86::IMUL16rr, X86::IMUL16rm }, - { X86::IMUL32rr, X86::IMUL32rm }, - { X86::IMUL64rr, X86::IMUL64rm }, - { X86::MAXPDrr, X86::MAXPDrm }, - { X86::MAXPDrr_Int, X86::MAXPDrm_Int }, - { X86::MAXPSrr, X86::MAXPSrm }, - { X86::MAXPSrr_Int, X86::MAXPSrm_Int }, - { X86::MAXSDrr, X86::MAXSDrm }, - { X86::MAXSDrr_Int, X86::MAXSDrm_Int }, - { X86::MAXSSrr, X86::MAXSSrm }, - { X86::MAXSSrr_Int, X86::MAXSSrm_Int }, - { X86::MINPDrr, X86::MINPDrm }, - { X86::MINPDrr_Int, X86::MINPDrm_Int }, - { X86::MINPSrr, X86::MINPSrm }, - { X86::MINPSrr_Int, X86::MINPSrm_Int }, - { X86::MINSDrr, X86::MINSDrm }, - { X86::MINSDrr_Int, X86::MINSDrm_Int }, - { X86::MINSSrr, X86::MINSSrm }, - { X86::MINSSrr_Int, X86::MINSSrm_Int }, - { X86::MULPDrr, X86::MULPDrm }, - { X86::MULPSrr, X86::MULPSrm }, - { X86::MULSDrr, X86::MULSDrm }, - { X86::MULSSrr, X86::MULSSrm }, - { X86::OR16rr, X86::OR16rm }, - { X86::OR32rr, X86::OR32rm }, - { X86::OR64rr, X86::OR64rm }, - { X86::OR8rr, X86::OR8rm }, - { X86::ORPDrr, X86::ORPDrm }, - { X86::ORPSrr, X86::ORPSrm }, - { X86::PACKSSDWrr, X86::PACKSSDWrm }, - { X86::PACKSSWBrr, X86::PACKSSWBrm }, - { X86::PACKUSWBrr, X86::PACKUSWBrm }, - { X86::PADDBrr, X86::PADDBrm }, - { X86::PADDDrr, X86::PADDDrm }, - { X86::PADDQrr, X86::PADDQrm }, - { X86::PADDSBrr, X86::PADDSBrm }, - { X86::PADDSWrr, X86::PADDSWrm }, - { X86::PADDWrr, X86::PADDWrm }, - { X86::PANDNrr, X86::PANDNrm }, - { X86::PANDrr, X86::PANDrm }, - { X86::PAVGBrr, X86::PAVGBrm }, - { X86::PAVGWrr, X86::PAVGWrm }, - { X86::PCMPEQBrr, X86::PCMPEQBrm }, - { X86::PCMPEQDrr, X86::PCMPEQDrm }, - { X86::PCMPEQWrr, X86::PCMPEQWrm }, - { X86::PCMPGTBrr, X86::PCMPGTBrm }, - { X86::PCMPGTDrr, X86::PCMPGTDrm }, - { X86::PCMPGTWrr, X86::PCMPGTWrm }, - { X86::PINSRWrri, X86::PINSRWrmi }, - { X86::PMADDWDrr, X86::PMADDWDrm }, - { X86::PMAXSWrr, X86::PMAXSWrm }, - { X86::PMAXUBrr, X86::PMAXUBrm }, - { X86::PMINSWrr, X86::PMINSWrm }, - { X86::PMINUBrr, X86::PMINUBrm }, - { X86::PMULDQrr, X86::PMULDQrm }, - { X86::PMULHUWrr, X86::PMULHUWrm }, - { X86::PMULHWrr, X86::PMULHWrm }, - { X86::PMULLDrr, X86::PMULLDrm }, - { X86::PMULLDrr_int, X86::PMULLDrm_int }, - { X86::PMULLWrr, X86::PMULLWrm }, - { X86::PMULUDQrr, X86::PMULUDQrm }, - { X86::PORrr, X86::PORrm }, - { X86::PSADBWrr, X86::PSADBWrm }, - { X86::PSLLDrr, X86::PSLLDrm }, - { X86::PSLLQrr, X86::PSLLQrm }, - { X86::PSLLWrr, X86::PSLLWrm }, - { X86::PSRADrr, X86::PSRADrm }, - { X86::PSRAWrr, X86::PSRAWrm }, - { X86::PSRLDrr, X86::PSRLDrm }, - { X86::PSRLQrr, X86::PSRLQrm }, - { X86::PSRLWrr, X86::PSRLWrm }, - { X86::PSUBBrr, X86::PSUBBrm }, - { X86::PSUBDrr, X86::PSUBDrm }, - { X86::PSUBSBrr, X86::PSUBSBrm }, - { X86::PSUBSWrr, X86::PSUBSWrm }, - { X86::PSUBWrr, X86::PSUBWrm }, - { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm }, - { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm }, - { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm }, - { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm }, - { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm }, - { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm }, - { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm }, - { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm }, - { X86::PXORrr, X86::PXORrm }, - { X86::SBB32rr, X86::SBB32rm }, - { X86::SBB64rr, X86::SBB64rm }, - { X86::SHUFPDrri, X86::SHUFPDrmi }, - { X86::SHUFPSrri, X86::SHUFPSrmi }, - { X86::SUB16rr, X86::SUB16rm }, - { X86::SUB32rr, X86::SUB32rm }, - { X86::SUB64rr, X86::SUB64rm }, - { X86::SUB8rr, X86::SUB8rm }, - { X86::SUBPDrr, X86::SUBPDrm }, - { X86::SUBPSrr, X86::SUBPSrm }, - { X86::SUBSDrr, X86::SUBSDrm }, - { X86::SUBSSrr, X86::SUBSSrm }, + static const unsigned OpTbl2[][3] = { + { X86::ADC32rr, X86::ADC32rm, 0 }, + { X86::ADC64rr, X86::ADC64rm, 0 }, + { X86::ADD16rr, X86::ADD16rm, 0 }, + { X86::ADD32rr, X86::ADD32rm, 0 }, + { X86::ADD64rr, X86::ADD64rm, 0 }, + { X86::ADD8rr, X86::ADD8rm, 0 }, + { X86::ADDPDrr, X86::ADDPDrm, 16 }, + { X86::ADDPSrr, X86::ADDPSrm, 16 }, + { X86::ADDSDrr, X86::ADDSDrm, 0 }, + { X86::ADDSSrr, X86::ADDSSrm, 0 }, + { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 }, + { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 }, + { X86::AND16rr, X86::AND16rm, 0 }, + { X86::AND32rr, X86::AND32rm, 0 }, + { X86::AND64rr, X86::AND64rm, 0 }, + { X86::AND8rr, X86::AND8rm, 0 }, + { X86::ANDNPDrr, X86::ANDNPDrm, 16 }, + { X86::ANDNPSrr, X86::ANDNPSrm, 16 }, + { X86::ANDPDrr, X86::ANDPDrm, 16 }, + { X86::ANDPSrr, X86::ANDPSrm, 16 }, + { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, + { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, + { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, + { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, + { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, + { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, + { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, + { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, + { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, + { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, + { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, + { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, + { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, + { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, + { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, + { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, + { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, + { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, + { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, + { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, + { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, + { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, + { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, + { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, + { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, + { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, + { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, + { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, + { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, + { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, + { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, + { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, + { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, + { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, + { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, + { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, + { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, + { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, + { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, + { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, + { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, + { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, + { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, + { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, + { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, + { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, + { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, + { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, + { X86::CMPPDrri, X86::CMPPDrmi, 16 }, + { X86::CMPPSrri, X86::CMPPSrmi, 16 }, + { X86::CMPSDrr, X86::CMPSDrm, 0 }, + { X86::CMPSSrr, X86::CMPSSrm, 0 }, + { X86::DIVPDrr, X86::DIVPDrm, 16 }, + { X86::DIVPSrr, X86::DIVPSrm, 16 }, + { X86::DIVSDrr, X86::DIVSDrm, 0 }, + { X86::DIVSSrr, X86::DIVSSrm, 0 }, + { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 }, + { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 }, + { X86::FsANDPDrr, X86::FsANDPDrm, 16 }, + { X86::FsANDPSrr, X86::FsANDPSrm, 16 }, + { X86::FsORPDrr, X86::FsORPDrm, 16 }, + { X86::FsORPSrr, X86::FsORPSrm, 16 }, + { X86::FsXORPDrr, X86::FsXORPDrm, 16 }, + { X86::FsXORPSrr, X86::FsXORPSrm, 16 }, + { X86::HADDPDrr, X86::HADDPDrm, 16 }, + { X86::HADDPSrr, X86::HADDPSrm, 16 }, + { X86::HSUBPDrr, X86::HSUBPDrm, 16 }, + { X86::HSUBPSrr, X86::HSUBPSrm, 16 }, + { X86::IMUL16rr, X86::IMUL16rm, 0 }, + { X86::IMUL32rr, X86::IMUL32rm, 0 }, + { X86::IMUL64rr, X86::IMUL64rm, 0 }, + { X86::MAXPDrr, X86::MAXPDrm, 16 }, + { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, + { X86::MAXPSrr, X86::MAXPSrm, 16 }, + { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 }, + { X86::MAXSDrr, X86::MAXSDrm, 0 }, + { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, + { X86::MAXSSrr, X86::MAXSSrm, 0 }, + { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, + { X86::MINPDrr, X86::MINPDrm, 16 }, + { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 }, + { X86::MINPSrr, X86::MINPSrm, 16 }, + { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 }, + { X86::MINSDrr, X86::MINSDrm, 0 }, + { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, + { X86::MINSSrr, X86::MINSSrm, 0 }, + { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, + { X86::MULPDrr, X86::MULPDrm, 16 }, + { X86::MULPSrr, X86::MULPSrm, 16 }, + { X86::MULSDrr, X86::MULSDrm, 0 }, + { X86::MULSSrr, X86::MULSSrm, 0 }, + { X86::OR16rr, X86::OR16rm, 0 }, + { X86::OR32rr, X86::OR32rm, 0 }, + { X86::OR64rr, X86::OR64rm, 0 }, + { X86::OR8rr, X86::OR8rm, 0 }, + { X86::ORPDrr, X86::ORPDrm, 16 }, + { X86::ORPSrr, X86::ORPSrm, 16 }, + { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 }, + { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 }, + { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 }, + { X86::PADDBrr, X86::PADDBrm, 16 }, + { X86::PADDDrr, X86::PADDDrm, 16 }, + { X86::PADDQrr, X86::PADDQrm, 16 }, + { X86::PADDSBrr, X86::PADDSBrm, 16 }, + { X86::PADDSWrr, X86::PADDSWrm, 16 }, + { X86::PADDWrr, X86::PADDWrm, 16 }, + { X86::PANDNrr, X86::PANDNrm, 16 }, + { X86::PANDrr, X86::PANDrm, 16 }, + { X86::PAVGBrr, X86::PAVGBrm, 16 }, + { X86::PAVGWrr, X86::PAVGWrm, 16 }, + { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 }, + { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 }, + { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 }, + { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 }, + { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 }, + { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 }, + { X86::PINSRWrri, X86::PINSRWrmi, 16 }, + { X86::PMADDWDrr, X86::PMADDWDrm, 16 }, + { X86::PMAXSWrr, X86::PMAXSWrm, 16 }, + { X86::PMAXUBrr, X86::PMAXUBrm, 16 }, + { X86::PMINSWrr, X86::PMINSWrm, 16 }, + { X86::PMINUBrr, X86::PMINUBrm, 16 }, + { X86::PMULDQrr, X86::PMULDQrm, 16 }, + { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, + { X86::PMULHWrr, X86::PMULHWrm, 16 }, + { X86::PMULLDrr, X86::PMULLDrm, 16 }, + { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 }, + { X86::PMULLWrr, X86::PMULLWrm, 16 }, + { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, + { X86::PORrr, X86::PORrm, 16 }, + { X86::PSADBWrr, X86::PSADBWrm, 16 }, + { X86::PSLLDrr, X86::PSLLDrm, 16 }, + { X86::PSLLQrr, X86::PSLLQrm, 16 }, + { X86::PSLLWrr, X86::PSLLWrm, 16 }, + { X86::PSRADrr, X86::PSRADrm, 16 }, + { X86::PSRAWrr, X86::PSRAWrm, 16 }, + { X86::PSRLDrr, X86::PSRLDrm, 16 }, + { X86::PSRLQrr, X86::PSRLQrm, 16 }, + { X86::PSRLWrr, X86::PSRLWrm, 16 }, + { X86::PSUBBrr, X86::PSUBBrm, 16 }, + { X86::PSUBDrr, X86::PSUBDrm, 16 }, + { X86::PSUBSBrr, X86::PSUBSBrm, 16 }, + { X86::PSUBSWrr, X86::PSUBSWrm, 16 }, + { X86::PSUBWrr, X86::PSUBWrm, 16 }, + { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 }, + { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 }, + { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 }, + { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 }, + { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 }, + { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 }, + { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 }, + { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 }, + { X86::PXORrr, X86::PXORrm, 16 }, + { X86::SBB32rr, X86::SBB32rm, 0 }, + { X86::SBB64rr, X86::SBB64rm, 0 }, + { X86::SHUFPDrri, X86::SHUFPDrmi, 16 }, + { X86::SHUFPSrri, X86::SHUFPSrmi, 16 }, + { X86::SUB16rr, X86::SUB16rm, 0 }, + { X86::SUB32rr, X86::SUB32rm, 0 }, + { X86::SUB64rr, X86::SUB64rm, 0 }, + { X86::SUB8rr, X86::SUB8rm, 0 }, + { X86::SUBPDrr, X86::SUBPDrm, 16 }, + { X86::SUBPSrr, X86::SUBPSrm, 16 }, + { X86::SUBSDrr, X86::SUBSDrm, 0 }, + { X86::SUBSSrr, X86::SUBSSrm, 0 }, // FIXME: TEST*rr -> swapped operand of TEST*mr. - { X86::UNPCKHPDrr, X86::UNPCKHPDrm }, - { X86::UNPCKHPSrr, X86::UNPCKHPSrm }, - { X86::UNPCKLPDrr, X86::UNPCKLPDrm }, - { X86::UNPCKLPSrr, X86::UNPCKLPSrm }, - { X86::XOR16rr, X86::XOR16rm }, - { X86::XOR32rr, X86::XOR32rm }, - { X86::XOR64rr, X86::XOR64rm }, - { X86::XOR8rr, X86::XOR8rm }, - { X86::XORPDrr, X86::XORPDrm }, - { X86::XORPSrr, X86::XORPSrm } + { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 }, + { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 }, + { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 }, + { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 }, + { X86::XOR16rr, X86::XOR16rm, 0 }, + { X86::XOR32rr, X86::XOR32rm, 0 }, + { X86::XOR64rr, X86::XOR64rm, 0 }, + { X86::XOR8rr, X86::XOR8rm, 0 }, + { X86::XORPDrr, X86::XORPDrm, 16 }, + { X86::XORPSrr, X86::XORPSrm, 16 } }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { unsigned RegOp = OpTbl2[i][0]; unsigned MemOp = OpTbl2[i][1]; + unsigned Align = OpTbl2[i][2]; if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,Align))).second) assert(false && "Duplicated entries?"); - unsigned AuxInfo = 2 | (1 << 4); // Index 2, folded load + // Index 2, folded load + unsigned AuxInfo = 2 | (1 << 4); if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, std::make_pair(RegOp, AuxInfo))).second) AmbEntries.push_back(MemOp); @@ -760,7 +766,6 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } - /// regIsPICBase - Return true if register is PIC base (i.e.g defined by /// X86::MOVPC32r. static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { @@ -776,37 +781,9 @@ static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { return isPICBase; } -/// isGVStub - Return true if the GV requires an extra load to get the -/// real address. -static inline bool isGVStub(GlobalValue *GV, X86TargetMachine &TM) { - return TM.getSubtarget().GVRequiresExtraLoad(GV, TM, false); -} - -/// CanRematLoadWithDispOperand - Return true if a load with the specified -/// operand is a candidate for remat: for this to be true we need to know that -/// the load will always return the same value, even if moved. -static bool CanRematLoadWithDispOperand(const MachineOperand &MO, - X86TargetMachine &TM) { - // Loads from constant pool entries can be remat'd. - if (MO.isCPI()) return true; - - // We can remat globals in some cases. - if (MO.isGlobal()) { - // If this is a load of a stub, not of the global, we can remat it. This - // access will always return the address of the global. - if (isGVStub(MO.getGlobal(), TM)) - return true; - - // If the global itself is constant, we can remat the load. - if (GlobalVariable *GV = dyn_cast(MO.getGlobal())) - if (GV->isConstant()) - return true; - } - return false; -} - bool -X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { +X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, + AliasAnalysis *AA) const { switch (MI->getOpcode()) { default: break; case X86::MOV8rm: @@ -825,7 +802,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { if (MI->getOperand(1).isReg() && MI->getOperand(2).isImm() && MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - CanRematLoadWithDispOperand(MI->getOperand(4), TM)) { + MI->isInvariantLoad(AA)) { unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg == 0 || BaseReg == X86::RIP) return true; @@ -876,7 +853,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { /// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that /// would clobber the EFLAGS condition register. Note the result may be /// conservative. If it cannot definitely determine the safety after visiting -/// two instructions it assumes it's not safe. +/// a few instructions in each direction it assumes it's not safe. static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { // It's always safe to clobber EFLAGS at the end of a block. @@ -884,11 +861,13 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, return true; // For compile time consideration, if we are not able to determine the - // safety after visiting 2 instructions, we will assume it's not safe. - for (unsigned i = 0; i < 2; ++i) { + // safety after visiting 4 instructions in each direction, we will assume + // it's not safe. + MachineBasicBlock::iterator Iter = I; + for (unsigned i = 0; i < 4; ++i) { bool SeenDef = false; - for (unsigned j = 0, e = I->getNumOperands(); j != e; ++j) { - MachineOperand &MO = I->getOperand(j); + for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { + MachineOperand &MO = Iter->getOperand(j); if (!MO.isReg()) continue; if (MO.getReg() == X86::EFLAGS) { @@ -901,10 +880,33 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, if (SeenDef) // This instruction defines EFLAGS, no need to look any further. return true; - ++I; + ++Iter; // If we make it to the end of the block, it's safe to clobber EFLAGS. - if (I == MBB.end()) + if (Iter == MBB.end()) + return true; + } + + Iter = I; + for (unsigned i = 0; i < 4; ++i) { + // If we make it to the beginning of the block, it's safe to clobber + // EFLAGS iff EFLAGS is not live-in. + if (Iter == MBB.begin()) + return !MBB.isLiveIn(X86::EFLAGS); + + --Iter; + bool SawKill = false; + for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { + MachineOperand &MO = Iter->getOperand(j); + if (MO.isReg() && MO.getReg() == X86::EFLAGS) { + if (MO.isDef()) return MO.isDead(); + if (MO.isKill()) SawKill = true; + } + } + + if (SawKill) + // This instruction kills EFLAGS and doesn't redefine it, so + // there's no need to look further. return true; } @@ -914,14 +916,11 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, + unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig) const { DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); - unsigned SubIdx = Orig->getOperand(0).isReg() - ? Orig->getOperand(0).getSubReg() : 0; - bool ChangeSubIdx = SubIdx != 0; if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { DestReg = RI.getSubReg(DestReg, SubIdx); SubIdx = 0; @@ -929,76 +928,36 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, // MOV32r0 etc. are implemented with xor which clobbers condition code. // Re-materialize them as movri instructions to avoid side effects. - bool Emitted = false; - switch (Orig->getOpcode()) { + bool Clone = true; + unsigned Opc = Orig->getOpcode(); + switch (Opc) { default: break; case X86::MOV8r0: case X86::MOV16r0: - case X86::MOV32r0: - case X86::MOV64r0: { + case X86::MOV32r0: { if (!isSafeToClobberEFLAGS(MBB, I)) { - unsigned Opc = 0; - switch (Orig->getOpcode()) { + switch (Opc) { default: break; case X86::MOV8r0: Opc = X86::MOV8ri; break; case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; - case X86::MOV64r0: Opc = X86::MOV64ri32; break; } - BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); - Emitted = true; + Clone = false; } break; } } - if (!Emitted) { + if (Clone) { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); MI->getOperand(0).setReg(DestReg); MBB.insert(I, MI); + } else { + BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); } - if (ChangeSubIdx) { - MachineInstr *NewMI = prior(I); - NewMI->getOperand(0).setSubReg(SubIdx); - } -} - -/// isInvariantLoad - Return true if the specified instruction (which is marked -/// mayLoad) is loading from a location whose value is invariant across the -/// function. For example, loading a value from the constant pool or from -/// from the argument area of a function if it does not change. This should -/// only return true of *all* loads the instruction does are invariant (if it -/// does multiple loads). -bool X86InstrInfo::isInvariantLoad(const MachineInstr *MI) const { - // This code cares about loads from three cases: constant pool entries, - // invariant argument slots, and global stubs. In order to handle these cases - // for all of the myriad of X86 instructions, we just scan for a CP/FI/GV - // operand and base our analysis on it. This is safe because the address of - // none of these three cases is ever used as anything other than a load base - // and X86 doesn't have any instructions that load from multiple places. - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - // Loads from constant pools are trivially invariant. - if (MO.isCPI()) - return true; - - if (MO.isGlobal()) - return isGVStub(MO.getGlobal(), TM); - - // If this is a load from an invariant stack slot, the load is a constant. - if (MO.isFI()) { - const MachineFrameInfo &MFI = - *MI->getParent()->getParent()->getFrameInfo(); - int Idx = MO.getIndex(); - return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx); - } - } - - // All other instances of these instructions are presumed to have other - // issues. - return false; + MachineInstr *NewMI = prior(I); + NewMI->getOperand(0).setSubReg(SubIdx); } /// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that @@ -1304,7 +1263,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { unsigned Opc; unsigned Size; switch (MI->getOpcode()) { - default: assert(0 && "Unreachable!"); + default: llvm_unreachable("Unreachable!"); case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; @@ -1459,7 +1418,7 @@ static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { switch (CC) { - default: assert(0 && "Illegal condition code!"); + default: llvm_unreachable("Illegal condition code!"); case X86::COND_E: return X86::JE; case X86::COND_NE: return X86::JNE; case X86::COND_L: return X86::JL; @@ -1483,7 +1442,7 @@ unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { /// e.g. turning COND_E to COND_NE. X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { switch (CC) { - default: assert(0 && "Illegal condition code!"); + default: llvm_unreachable("Illegal condition code!"); case X86::COND_E: return X86::COND_NE; case X86::COND_NE: return X86::COND_E; case X86::COND_L: return X86::COND_GE; @@ -1699,14 +1658,26 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, /* Source and destination have the same register class. */; else if (CommonRC->hasSuperClass(SrcRC)) CommonRC = SrcRC; - else if (!DestRC->hasSubClass(SrcRC)) - CommonRC = 0; + else if (!DestRC->hasSubClass(SrcRC)) { + // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, + // but we want to copy then as GR64. Similarly, for GR32_NOREX and + // GR32_NOSP, copy as GR32. + if (SrcRC->hasSuperClass(&X86::GR64RegClass) && + DestRC->hasSuperClass(&X86::GR64RegClass)) + CommonRC = &X86::GR64RegClass; + else if (SrcRC->hasSuperClass(&X86::GR32RegClass) && + DestRC->hasSuperClass(&X86::GR32RegClass)) + CommonRC = &X86::GR32RegClass; + else + CommonRC = 0; + } if (CommonRC) { unsigned Opc; - if (CommonRC == &X86::GR64RegClass) { + if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) { Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32RegClass) { + } else if (CommonRC == &X86::GR32RegClass || + CommonRC == &X86::GR32_NOSPRegClass) { Opc = X86::MOV32rr; } else if (CommonRC == &X86::GR16RegClass) { Opc = X86::MOV16rr; @@ -1731,7 +1702,8 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, Opc = X86::MOV8rr_NOREX; else Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_NOREXRegClass) { + } else if (CommonRC == &X86::GR64_NOREXRegClass || + CommonRC == &X86::GR64_NOREX_NOSPRegClass) { Opc = X86::MOV64rr; } else if (CommonRC == &X86::GR32_NOREXRegClass) { Opc = X86::MOV32rr; @@ -1759,16 +1731,17 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg); return true; } - + // Moving EFLAGS to / from another register requires a push and a pop. if (SrcRC == &X86::CCRRegClass) { if (SrcReg != X86::EFLAGS) return false; - if (DestRC == &X86::GR64RegClass) { + if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSHFQ)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); return true; - } else if (DestRC == &X86::GR32RegClass) { + } else if (DestRC == &X86::GR32RegClass || + DestRC == &X86::GR32_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSHFD)); BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); return true; @@ -1776,11 +1749,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, } else if (DestRC == &X86::CCRRegClass) { if (DestReg != X86::EFLAGS) return false; - if (SrcRC == &X86::GR64RegClass) { + if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); BuildMI(MBB, MI, DL, get(X86::POPFQ)); return true; - } else if (SrcRC == &X86::GR32RegClass) { + } else if (SrcRC == &X86::GR32RegClass || + DestRC == &X86::GR32_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); BuildMI(MBB, MI, DL, get(X86::POPFD)); return true; @@ -1838,9 +1812,9 @@ static unsigned getStoreRegOpcode(unsigned SrcReg, bool isStackAligned, TargetMachine &TM) { unsigned Opc = 0; - if (RC == &X86::GR64RegClass) { + if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { Opc = X86::MOV64mr; - } else if (RC == &X86::GR32RegClass) { + } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { Opc = X86::MOV32mr; } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16mr; @@ -1865,7 +1839,8 @@ static unsigned getStoreRegOpcode(unsigned SrcReg, Opc = X86::MOV8mr_NOREX; else Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_NOREXRegClass) { + } else if (RC == &X86::GR64_NOREXRegClass || + RC == &X86::GR64_NOREX_NOSPRegClass) { Opc = X86::MOV64mr; } else if (RC == &X86::GR32_NOREXRegClass) { Opc = X86::MOV32mr; @@ -1889,8 +1864,7 @@ static unsigned getStoreRegOpcode(unsigned SrcReg, } else if (RC == &X86::VR64RegClass) { Opc = X86::MMX_MOVQ64mr; } else { - assert(0 && "Unknown regclass"); - abort(); + llvm_unreachable("Unknown regclass"); } return Opc; @@ -1914,6 +1888,8 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, SmallVectorImpl &Addr, const TargetRegisterClass *RC, + MachineInstr::mmo_iterator MMOBegin, + MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); @@ -1923,6 +1899,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, for (unsigned i = 0, e = Addr.size(); i != e; ++i) MIB.addOperand(Addr[i]); MIB.addReg(SrcReg, getKillRegState(isKill)); + (*MIB).setMemRefs(MMOBegin, MMOEnd); NewMIs.push_back(MIB); } @@ -1931,9 +1908,9 @@ static unsigned getLoadRegOpcode(unsigned DestReg, bool isStackAligned, const TargetMachine &TM) { unsigned Opc = 0; - if (RC == &X86::GR64RegClass) { + if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { Opc = X86::MOV64rm; - } else if (RC == &X86::GR32RegClass) { + } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { Opc = X86::MOV32rm; } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16rm; @@ -1958,7 +1935,8 @@ static unsigned getLoadRegOpcode(unsigned DestReg, Opc = X86::MOV8rm_NOREX; else Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_NOREXRegClass) { + } else if (RC == &X86::GR64_NOREXRegClass || + RC == &X86::GR64_NOREX_NOSPRegClass) { Opc = X86::MOV64rm; } else if (RC == &X86::GR32_NOREXRegClass) { Opc = X86::MOV32rm; @@ -1982,8 +1960,7 @@ static unsigned getLoadRegOpcode(unsigned DestReg, } else if (RC == &X86::VR64RegClass) { Opc = X86::MMX_MOVQ64rm; } else { - assert(0 && "Unknown regclass"); - abort(); + llvm_unreachable("Unknown regclass"); } return Opc; @@ -2005,6 +1982,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl &Addr, const TargetRegisterClass *RC, + MachineInstr::mmo_iterator MMOBegin, + MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); @@ -2013,6 +1992,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); for (unsigned i = 0, e = Addr.size(); i != e; ++i) MIB.addOperand(Addr[i]); + (*MIB).setMemRefs(MMOBegin, MMOEnd); NewMIs.push_back(MIB); } @@ -2026,9 +2006,11 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, if (MI != MBB.end()) DL = MI->getDebugLoc(); bool is64Bit = TM.getSubtarget().is64Bit(); + bool isWin64 = TM.getSubtarget().isTargetWin64(); unsigned SlotSize = is64Bit ? 8 : 4; MachineFunction &MF = *MBB.getParent(); + unsigned FPReg = RI.getFrameRegister(MF); X86MachineFunctionInfo *X86FI = MF.getInfo(); unsigned CalleeFrameSize = 0; @@ -2038,10 +2020,12 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); - if (RegClass != &X86::VR128RegClass) { + if (Reg == FPReg) + // X86RegisterInfo::emitPrologue will handle spilling of frame register. + continue; + if (RegClass != &X86::VR128RegClass && !isWin64) { CalleeFrameSize += SlotSize; - BuildMI(MBB, MI, DL, get(Opc)) - .addReg(Reg, RegState::Kill); + BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); } else { storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); } @@ -2060,13 +2044,18 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, DebugLoc DL = DebugLoc::getUnknownLoc(); if (MI != MBB.end()) DL = MI->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); + unsigned FPReg = RI.getFrameRegister(MF); bool is64Bit = TM.getSubtarget().is64Bit(); - + bool isWin64 = TM.getSubtarget().isTargetWin64(); unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); + if (Reg == FPReg) + // X86RegisterInfo::emitEpilogue will handle restoring of frame register. + continue; const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass != &X86::VR128RegClass) { + if (RegClass != &X86::VR128RegClass && !isWin64) { BuildMI(MBB, MI, DL, get(Opc), Reg); } else { loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); @@ -2143,8 +2132,9 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned i, - const SmallVectorImpl &MOs) const{ - const DenseMap *OpcodeTablePtr = NULL; + const SmallVectorImpl &MOs, + unsigned Size, unsigned Align) const { + const DenseMap > *OpcodeTablePtr=NULL; bool isTwoAddrFold = false; unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && @@ -2165,8 +2155,6 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); else if (MI->getOpcode() == X86::MOV32r0) NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); - else if (MI->getOpcode() == X86::MOV64r0) - NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); else if (MI->getOpcode() == X86::MOV8r0) NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); if (NewMI) @@ -2182,60 +2170,82 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // If table selected... if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap::iterator I = + DenseMap >::iterator I = OpcodeTablePtr->find((unsigned*)MI->getOpcode()); if (I != OpcodeTablePtr->end()) { + unsigned Opcode = I->second.first; + unsigned MinAlign = I->second.second; + if (Align < MinAlign) + return NULL; + bool NarrowToMOV32rm = false; + if (Size) { + unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize(); + if (Size < RCSize) { + // Check if it's safe to fold the load. If the size of the object is + // narrower than the load width, then it's not. + if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) + return NULL; + // If this is a 64-bit load, but the spill slot is 32, then we can do + // a 32-bit load which is implicitly zero-extended. This likely is due + // to liveintervalanalysis remat'ing a load from stack slot. + if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg()) + return NULL; + Opcode = X86::MOV32rm; + NarrowToMOV32rm = true; + } + } + if (isTwoAddrFold) - NewMI = FuseTwoAddrInst(MF, I->second, MOs, MI, *this); + NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this); else - NewMI = FuseInst(MF, I->second, i, MOs, MI, *this); + NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this); + + if (NarrowToMOV32rm) { + // If this is the special case where we use a MOV32rm to load a 32-bit + // value and zero-extend the top bits. Change the destination register + // to a 32-bit one. + unsigned DstReg = NewMI->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, + 4/*x86_subreg_32bit*/)); + else + NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/); + } return NewMI; } } // No fusion if (PrintFailedFusing) - cerr << "We failed to fuse operand " << i << " in " << *MI; + errs() << "We failed to fuse operand " << i << " in " << *MI; return NULL; } MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl &Ops, + const SmallVectorImpl &Ops, int FrameIndex) const { // Check switch flag if (NoFusing) return NULL; const MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Size = MFI->getObjectSize(FrameIndex); unsigned Alignment = MFI->getObjectAlignment(FrameIndex); - // FIXME: Move alignment requirement into tables? - if (Alignment < 16) { - switch (MI->getOpcode()) { - default: break; - // Not always safe to fold movsd into these instructions since their load - // folding variants expects the address to be 16 byte aligned. - case X86::FsANDNPDrr: - case X86::FsANDNPSrr: - case X86::FsANDPDrr: - case X86::FsANDPSrr: - case X86::FsORPDrr: - case X86::FsORPSrr: - case X86::FsXORPDrr: - case X86::FsXORPSrr: - return NULL; - } - } - if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; + unsigned RCSize = 0; switch (MI->getOpcode()) { default: return NULL; - case X86::TEST8rr: NewOpc = X86::CMP8ri; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; + case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break; } + // Check if it's safe to fold the load. If the size of the object is + // narrower than the load width, then it's not. + if (Size < RCSize) + return NULL; // Change to CMPXXri r, 0 first. MI->setDesc(get(NewOpc)); MI->getOperand(1).ChangeToImmediate(0); @@ -2244,12 +2254,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, SmallVector MOs; MOs.push_back(MachineOperand::CreateFI(FrameIndex)); - return foldMemoryOperandImpl(MF, MI, Ops[0], MOs); + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment); } MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl &Ops, + const SmallVectorImpl &Ops, MachineInstr *LoadMI) const { // Check switch flag if (NoFusing) return NULL; @@ -2257,26 +2267,22 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Determine the alignment of the load. unsigned Alignment = 0; if (LoadMI->hasOneMemOperand()) - Alignment = LoadMI->memoperands_begin()->getAlignment(); - - // FIXME: Move alignment requirement into tables? - if (Alignment < 16) { - switch (MI->getOpcode()) { - default: break; - // Not always safe to fold movsd into these instructions since their load - // folding variants expects the address to be 16 byte aligned. - case X86::FsANDNPDrr: - case X86::FsANDNPSrr: - case X86::FsANDPDrr: - case X86::FsANDPSrr: - case X86::FsORPDrr: - case X86::FsORPSrr: - case X86::FsXORPDrr: - case X86::FsXORPSrr: - return NULL; + Alignment = (*LoadMI->memoperands_begin())->getAlignment(); + else + switch (LoadMI->getOpcode()) { + case X86::V_SET0: + case X86::V_SETALLONES: + Alignment = 16; + break; + case X86::FsFLD0SD: + Alignment = 8; + break; + case X86::FsFLD0SS: + Alignment = 4; + break; + default: + llvm_unreachable("Don't know how to fold this instruction!"); } - } - if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; switch (MI->getOpcode()) { @@ -2293,28 +2299,40 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return NULL; SmallVector MOs; - if (LoadMI->getOpcode() == X86::V_SET0 || - LoadMI->getOpcode() == X86::V_SETALLONES) { + switch (LoadMI->getOpcode()) { + case X86::V_SET0: + case X86::V_SETALLONES: + case X86::FsFLD0SD: + case X86::FsFLD0SS: { // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. // Create a constant-pool entry and operands to load from it. // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; - if (TM.getRelocationModel() == Reloc::PIC_ && - !TM.getSubtarget().is64Bit()) - // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); - // This doesn't work for several reasons. - // 1. GlobalBaseReg may have been spilled. - // 2. It may not be live at MI. - return false; + if (TM.getRelocationModel() == Reloc::PIC_) { + if (TM.getSubtarget().is64Bit()) + PICBase = X86::RIP; + else + // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); + // This doesn't work for several reasons. + // 1. GlobalBaseReg may have been spilled. + // 2. It may not be live at MI. + return NULL; + } - // Create a v4i32 constant-pool entry. + // Create a constant-pool entry. MachineConstantPool &MCP = *MF.getConstantPool(); - const VectorType *Ty = VectorType::get(Type::Int32Ty, 4); - Constant *C = LoadMI->getOpcode() == X86::V_SET0 ? - ConstantVector::getNullValue(Ty) : - ConstantVector::getAllOnesValue(Ty); - unsigned CPI = MCP.getConstantPoolIndex(C, 16); + const Type *Ty; + if (LoadMI->getOpcode() == X86::FsFLD0SS) + Ty = Type::getFloatTy(MF.getFunction()->getContext()); + else if (LoadMI->getOpcode() == X86::FsFLD0SD) + Ty = Type::getDoubleTy(MF.getFunction()->getContext()); + else + Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); + Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? + Constant::getAllOnesValue(Ty) : + Constant::getNullValue(Ty); + unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); // Create operands to load from the constant pool entry. MOs.push_back(MachineOperand::CreateReg(PICBase, false)); @@ -2322,13 +2340,17 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MOs.push_back(MachineOperand::CreateReg(0, false)); MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); MOs.push_back(MachineOperand::CreateReg(0, false)); - } else { + break; + } + default: { // Folding a normal load. Just copy the load's address operands. unsigned NumOps = LoadMI->getDesc().getNumOperands(); for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) MOs.push_back(LoadMI->getOperand(i)); + break; + } } - return foldMemoryOperandImpl(MF, MI, Ops[0], MOs); + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment); } @@ -2360,15 +2382,14 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires // replacing the *two* registers with the memory location. - const DenseMap *OpcodeTablePtr = NULL; + const DenseMap > *OpcodeTablePtr=NULL; if (isTwoAddr && NumOps >= 2 && OpNum < 2) { OpcodeTablePtr = &RegOp2MemOpTable2Addr; } else if (OpNum == 0) { // If operand 0 switch (Opc) { + case X86::MOV8r0: case X86::MOV16r0: case X86::MOV32r0: - case X86::MOV64r0: - case X86::MOV8r0: return true; default: break; } @@ -2381,7 +2402,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap::iterator I = + DenseMap >::iterator I = OpcodeTablePtr->find((unsigned*)Opc); if (I != OpcodeTablePtr->end()) return true; @@ -2410,8 +2431,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, const TargetInstrDesc &TID = get(Opc); const TargetOperandInfo &TOI = TID.OpInfo[Index]; - const TargetRegisterClass *RC = TOI.isLookupPtrRegClass() - ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass); + const TargetRegisterClass *RC = TOI.getRegClass(&RI); SmallVector AddrOps; SmallVector BeforeOps; SmallVector AfterOps; @@ -2430,7 +2450,11 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the load instruction. if (UnfoldLoad) { - loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs); + std::pair MMOs = + MF.extractLoadMemRefs(MI->memoperands_begin(), + MI->memoperands_end()); + loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs); if (UnfoldStore) { // Address operands cannot be marked isKill. for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) { @@ -2489,10 +2513,12 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the store instruction. if (UnfoldStore) { - const TargetOperandInfo &DstTOI = TID.OpInfo[0]; - const TargetRegisterClass *DstRC = DstTOI.isLookupPtrRegClass() - ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass); - storeRegToAddr(MF, Reg, true, AddrOps, DstRC, NewMIs); + const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI); + std::pair MMOs = + MF.extractStoreMemRefs(MI->memoperands_begin(), + MI->memoperands_end()); + storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs); } return true; @@ -2513,9 +2539,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, bool FoldedLoad = I->second.second & (1 << 4); bool FoldedStore = I->second.second & (1 << 5); const TargetInstrDesc &TID = get(Opc); - const TargetOperandInfo &TOI = TID.OpInfo[Index]; - const TargetRegisterClass *RC = TOI.isLookupPtrRegClass() - ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass); + const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI); unsigned NumDefs = TID.NumDefs; std::vector AddrOps; std::vector BeforeOps; @@ -2536,35 +2560,40 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, // Emit the load instruction. SDNode *Load = 0; - const MachineFunction &MF = DAG.getMachineFunction(); + MachineFunction &MF = DAG.getMachineFunction(); if (FoldedLoad) { - MVT VT = *RC->vt_begin(); + EVT VT = *RC->vt_begin(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - Load = DAG.getTargetNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, - VT, MVT::Other, &AddrOps[0], AddrOps.size()); + Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, + VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); + + // Preserve memory reference information. + std::pair MMOs = + MF.extractLoadMemRefs(cast(N)->memoperands_begin(), + cast(N)->memoperands_end()); + cast(Load)->setMemRefs(MMOs.first, MMOs.second); } // Emit the data processing instruction. - std::vector VTs; + std::vector VTs; const TargetRegisterClass *DstRC = 0; if (TID.getNumDefs() > 0) { - const TargetOperandInfo &DstTOI = TID.OpInfo[0]; - DstRC = DstTOI.isLookupPtrRegClass() - ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass); + DstRC = TID.OpInfo[0].getRegClass(&RI); VTs.push_back(*DstRC->vt_begin()); } for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - MVT VT = N->getValueType(i); + EVT VT = N->getValueType(i); if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) VTs.push_back(VT); } if (Load) BeforeOps.push_back(SDValue(Load, 0)); std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); - SDNode *NewNode= DAG.getTargetNode(Opc, dl, VTs, &BeforeOps[0], - BeforeOps.size()); + SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0], + BeforeOps.size()); NewNodes.push_back(NewNode); // Emit the store instruction. @@ -2574,11 +2603,18 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, AddrOps.push_back(Chain); bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(0, DstRC, - isAligned, TM), - dl, MVT::Other, - &AddrOps[0], AddrOps.size()); + SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, + isAligned, TM), + dl, MVT::Other, + &AddrOps[0], AddrOps.size()); NewNodes.push_back(Store); + + // Preserve memory reference information. + std::pair MMOs = + MF.extractStoreMemRefs(cast(N)->memoperands_begin(), + cast(N)->memoperands_end()); + cast(Load)->setMemRefs(MMOs.first, MMOs.second); } return true; @@ -2644,7 +2680,7 @@ unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) { case X86II::Imm16: return 2; case X86II::Imm32: return 4; case X86II::Imm64: return 8; - default: assert(0 && "Immediate size not set!"); + default: llvm_unreachable("Immediate size not set!"); return 0; } } @@ -2829,7 +2865,7 @@ static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) { } else if (RelocOp->isJTI()) { FinalSize += sizeJumpTableAddress(false); } else { - assert(0 && "Unknown value to relocate!"); + llvm_unreachable("Unknown value to relocate!"); } return FinalSize; } @@ -2926,7 +2962,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::GS: ++FinalSize; break; - default: assert(0 && "Invalid segment!"); + default: llvm_unreachable("Invalid segment!"); case 0: break; // No segment override! } @@ -2946,6 +2982,10 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::TA: // 0F 3A Need0FPrefix = true; break; + case X86II::TF: // F2 0F 38 + ++FinalSize; + Need0FPrefix = true; + break; case X86II::REP: break; // already handled. case X86II::XS: // F3 0F ++FinalSize; @@ -2959,7 +2999,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: ++FinalSize; break; // Two-byte opcode prefix - default: assert(0 && "Invalid prefix!"); + default: llvm_unreachable("Invalid prefix!"); case 0: break; // No prefix! } @@ -2981,6 +3021,9 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::TA: // 0F 3A ++FinalSize; break; + case X86II::TF: // F2 0F 38 + ++FinalSize; + break; } // If this is a two-address instruction, skip one of the register operands. @@ -2993,7 +3036,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, --NumOps; switch (Desc->TSFlags & X86II::FormMask) { - default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!"); + default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); case X86II::Pseudo: // Remember the current PC offset, this is the PIC relocation // base address. @@ -3002,16 +3045,16 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, break; case TargetInstrInfo::INLINEASM: { const MachineFunction *MF = MI.getParent()->getParent(); - const char *AsmStr = MI.getOperand(0).getSymbolName(); - const TargetAsmInfo* AI = MF->getTarget().getTargetAsmInfo(); - FinalSize += AI->getInlineAsmLength(AsmStr); + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(), + *MF->getTarget().getMCAsmInfo()); break; } case TargetInstrInfo::DBG_LABEL: case TargetInstrInfo::EH_LABEL: break; case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::DECLARE: + case TargetInstrInfo::KILL: case X86::DWARF_LOC: case X86::FP_REG_KILL: break; @@ -3038,7 +3081,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } else if (MO.isImm()) { FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); } else { - assert(0 && "Unknown RawFrm operand!"); + llvm_unreachable("Unknown RawFrm operand!"); } } break; @@ -3196,10 +3239,10 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } if (!Desc->isVariadic() && CurOp != NumOps) { - cerr << "Cannot determine size: "; - MI.dump(); - cerr << '\n'; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Cannot determine size: " << MI; + llvm_report_error(Msg.str()); } @@ -3209,7 +3252,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { const TargetInstrDesc &Desc = MI->getDesc(); - bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_); + bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; bool Is64BitMode = TM.getSubtargetImpl()->is64Bit(); unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode); if (Desc.getOpcode() == X86::MOVPC32r) @@ -3245,12 +3288,11 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { // If we're using vanilla 'GOT' PIC style, we should use relative addressing // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. - if (TM.getRelocationModel() == Reloc::PIC_ && - TM.getSubtarget().isPICStyleGOT()) { + if (TM.getSubtarget().isPICStyleGOT()) { GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) - .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 0, + .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", X86II::MO_GOT_ABSOLUTE_ADDRESS); } else { GlobalBaseReg = PC; -- cgit v1.1