summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp441
1 files changed, 390 insertions, 51 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index 3a02de0..307c96b 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===//
+//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -25,14 +25,13 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/MC/MCAsmInfo.h"
#include <limits>
#define GET_INSTRINFO_CTOR
@@ -83,6 +82,12 @@ enum {
TB_FOLDED_STORE = 1 << 19
};
+struct X86OpTblEntry {
+ uint16_t RegOp;
+ uint16_t MemOp;
+ uint32_t Flags;
+};
+
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
? X86::ADJCALLSTACKDOWN64
@@ -92,7 +97,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: X86::ADJCALLSTACKUP32)),
TM(tm), RI(tm, *this) {
- static const unsigned OpTbl2Addr[][3] = {
+ static const X86OpTblEntry OpTbl2Addr[] = {
{ X86::ADC32ri, X86::ADC32mi, 0 },
{ X86::ADC32ri8, X86::ADC32mi8, 0 },
{ X86::ADC32rr, X86::ADC32mr, 0 },
@@ -260,22 +265,21 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
};
for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
- unsigned RegOp = OpTbl2Addr[i][0];
- unsigned MemOp = OpTbl2Addr[i][1];
- unsigned Flags = OpTbl2Addr[i][2];
+ unsigned RegOp = OpTbl2Addr[i].RegOp;
+ unsigned MemOp = OpTbl2Addr[i].MemOp;
+ unsigned Flags = OpTbl2Addr[i].Flags;
AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
RegOp, MemOp,
// Index 0, folded load and store, no alignment requirement.
Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
}
- static const unsigned OpTbl0[][3] = {
+ static const X86OpTblEntry OpTbl0[] = {
{ X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD },
{ X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD },
{ X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD },
{ X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD },
{ X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD },
- { X86::WINCALL64r, X86::WINCALL64m, TB_FOLDED_LOAD },
{ X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD },
{ X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD },
{ X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD },
@@ -352,6 +356,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
@@ -362,6 +367,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE },
{ X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE },
// AVX 256-bit foldable instructions
+ { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
{ X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
{ X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
@@ -370,14 +376,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
- unsigned RegOp = OpTbl0[i][0];
- unsigned MemOp = OpTbl0[i][1];
- unsigned Flags = OpTbl0[i][2];
+ unsigned RegOp = OpTbl0[i].RegOp;
+ unsigned MemOp = OpTbl0[i].MemOp;
+ unsigned Flags = OpTbl0[i].Flags;
AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
RegOp, MemOp, TB_INDEX_0 | Flags);
}
- static const unsigned OpTbl1[][3] = {
+ static const X86OpTblEntry OpTbl1[] = {
{ X86::CMP16rr, X86::CMP16rm, 0 },
{ X86::CMP32rr, X86::CMP32rm, 0 },
{ X86::CMP64rr, X86::CMP64rm, 0 },
@@ -456,6 +462,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVZX64rr16, X86::MOVZX64rm16, 0 },
{ X86::MOVZX64rr32, X86::MOVZX64rm32, 0 },
{ X86::MOVZX64rr8, X86::MOVZX64rm8, 0 },
+ { X86::PABSBrr128, X86::PABSBrm128, TB_ALIGN_16 },
+ { X86::PABSDrr128, X86::PABSDrm128, TB_ALIGN_16 },
+ { X86::PABSWrr128, X86::PABSWrm128, TB_ALIGN_16 },
{ X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 },
{ X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 },
{ X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 },
@@ -508,6 +517,11 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 },
{ X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 },
{ X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 },
+ { X86::VPABSBrr128, X86::VPABSBrm128, TB_ALIGN_16 },
+ { X86::VPABSDrr128, X86::VPABSDrm128, TB_ALIGN_16 },
+ { X86::VPABSWrr128, X86::VPABSWrm128, TB_ALIGN_16 },
+ { X86::VPERMILPDri, X86::VPERMILPDmi, TB_ALIGN_16 },
+ { X86::VPERMILPSri, X86::VPERMILPSmi, TB_ALIGN_16 },
{ X86::VPSHUFDri, X86::VPSHUFDmi, TB_ALIGN_16 },
{ X86::VPSHUFHWri, X86::VPSHUFHWmi, TB_ALIGN_16 },
{ X86::VPSHUFLWri, X86::VPSHUFLWmi, TB_ALIGN_16 },
@@ -524,22 +538,39 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
// AVX 256-bit foldable instructions
{ X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
{ X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
- { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_16 },
+ { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 },
{ X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
- { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }
+ { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
+ { X86::VPERMILPDYri, X86::VPERMILPDYmi, TB_ALIGN_32 },
+ { X86::VPERMILPSYri, X86::VPERMILPSYmi, TB_ALIGN_32 },
+ // AVX2 foldable instructions
+ { X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_32 },
+ { X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_32 },
+ { X86::VPABSWrr256, X86::VPABSWrm256, TB_ALIGN_32 },
+ { X86::VPSHUFDYri, X86::VPSHUFDYmi, TB_ALIGN_32 },
+ { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, TB_ALIGN_32 },
+ { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, TB_ALIGN_32 },
+ { X86::VRCPPSYr, X86::VRCPPSYm, TB_ALIGN_32 },
+ { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, TB_ALIGN_32 },
+ { X86::VRSQRTPSYr, X86::VRSQRTPSYm, TB_ALIGN_32 },
+ { X86::VRSQRTPSYr_Int, X86::VRSQRTPSYm_Int, TB_ALIGN_32 },
+ { X86::VSQRTPDYr, X86::VSQRTPDYm, TB_ALIGN_32 },
+ { X86::VSQRTPDYr_Int, X86::VSQRTPDYm_Int, TB_ALIGN_32 },
+ { X86::VSQRTPSYr, X86::VSQRTPSYm, TB_ALIGN_32 },
+ { X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
- unsigned RegOp = OpTbl1[i][0];
- unsigned MemOp = OpTbl1[i][1];
- unsigned Flags = OpTbl1[i][2];
+ unsigned RegOp = OpTbl1[i].RegOp;
+ unsigned MemOp = OpTbl1[i].MemOp;
+ unsigned Flags = OpTbl1[i].Flags;
AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
RegOp, MemOp,
// Index 1, folded load
Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
}
- static const unsigned OpTbl2[][3] = {
+ static const X86OpTblEntry OpTbl2[] = {
{ X86::ADC32rr, X86::ADC32rm, 0 },
{ X86::ADC64rr, X86::ADC64rm, 0 },
{ X86::ADD16rr, X86::ADD16rm, 0 },
@@ -563,6 +594,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 },
{ X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 },
{ X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 },
+ { X86::BLENDPDrri, X86::BLENDPDrmi, TB_ALIGN_16 },
+ { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 },
+ { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 },
+ { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 },
{ X86::CMOVA16rr, X86::CMOVA16rm, 0 },
{ X86::CMOVA32rr, X86::CMOVA32rm, 0 },
{ X86::CMOVA64rr, X86::CMOVA64rm, 0 },
@@ -652,6 +687,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
{ X86::MINSSrr, X86::MINSSrm, 0 },
{ X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
+ { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
{ X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
{ X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
{ X86::MULSDrr, X86::MULSDrm, 0 },
@@ -664,30 +700,45 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 },
{ X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 },
{ X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 },
+ { X86::PACKUSDWrr, X86::PACKUSDWrm, TB_ALIGN_16 },
{ X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 },
{ X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 },
{ X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 },
{ X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 },
{ X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 },
{ X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 },
+ { X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 },
+ { X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 },
{ X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 },
+ { X86::PALIGNR128rr, X86::PALIGNR128rm, TB_ALIGN_16 },
{ X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 },
{ X86::PANDrr, X86::PANDrm, TB_ALIGN_16 },
{ X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 },
{ X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 },
+ { X86::PBLENDWrri, X86::PBLENDWrmi, TB_ALIGN_16 },
{ X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 },
{ X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 },
+ { X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 },
{ X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 },
{ X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 },
{ X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 },
+ { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 },
{ X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 },
+ { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 },
+ { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 },
+ { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 },
+ { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 },
+ { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 },
+ { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 },
{ X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 },
+ { X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 },
{ X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
{ X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 },
{ X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
{ X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
{ X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
{ X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
+ { X86::PMULHRSWrr128, X86::PMULHRSWrm128, TB_ALIGN_16 },
{ X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
{ X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 },
{ X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 },
@@ -695,6 +746,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 },
{ X86::PORrr, X86::PORrm, TB_ALIGN_16 },
{ X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 },
+ { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 },
+ { X86::PSIGNBrr, X86::PSIGNBrm, TB_ALIGN_16 },
+ { X86::PSIGNWrr, X86::PSIGNWrm, TB_ALIGN_16 },
+ { X86::PSIGNDrr, X86::PSIGNDrm, TB_ALIGN_16 },
{ X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 },
{ X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 },
{ X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 },
@@ -778,6 +833,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VANDNPSrr, X86::VANDNPSrm, TB_ALIGN_16 },
{ X86::VANDPDrr, X86::VANDPDrm, TB_ALIGN_16 },
{ X86::VANDPSrr, X86::VANDPSrm, TB_ALIGN_16 },
+ { X86::VBLENDPDrri, X86::VBLENDPDrmi, TB_ALIGN_16 },
+ { X86::VBLENDPSrri, X86::VBLENDPSrmi, TB_ALIGN_16 },
+ { X86::VBLENDVPDrr, X86::VBLENDVPDrm, TB_ALIGN_16 },
+ { X86::VBLENDVPSrr, X86::VBLENDVPSrm, TB_ALIGN_16 },
{ X86::VCMPPDrri, X86::VCMPPDrmi, TB_ALIGN_16 },
{ X86::VCMPPSrri, X86::VCMPPSrmi, TB_ALIGN_16 },
{ X86::VCMPSDrr, X86::VCMPSDrm, 0 },
@@ -816,6 +875,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 },
{ X86::VMINSSrr, X86::VMINSSrm, 0 },
{ X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 },
+ { X86::VMPSADBWrri, X86::VMPSADBWrmi, TB_ALIGN_16 },
{ X86::VMULPDrr, X86::VMULPDrm, TB_ALIGN_16 },
{ X86::VMULPSrr, X86::VMULPSrm, TB_ALIGN_16 },
{ X86::VMULSDrr, X86::VMULSDrm, 0 },
@@ -824,28 +884,47 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VORPSrr, X86::VORPSrm, TB_ALIGN_16 },
{ X86::VPACKSSDWrr, X86::VPACKSSDWrm, TB_ALIGN_16 },
{ X86::VPACKSSWBrr, X86::VPACKSSWBrm, TB_ALIGN_16 },
+ { X86::VPACKUSDWrr, X86::VPACKUSDWrm, TB_ALIGN_16 },
{ X86::VPACKUSWBrr, X86::VPACKUSWBrm, TB_ALIGN_16 },
{ X86::VPADDBrr, X86::VPADDBrm, TB_ALIGN_16 },
{ X86::VPADDDrr, X86::VPADDDrm, TB_ALIGN_16 },
{ X86::VPADDQrr, X86::VPADDQrm, TB_ALIGN_16 },
{ X86::VPADDSBrr, X86::VPADDSBrm, TB_ALIGN_16 },
{ X86::VPADDSWrr, X86::VPADDSWrm, TB_ALIGN_16 },
+ { X86::VPADDUSBrr, X86::VPADDUSBrm, TB_ALIGN_16 },
+ { X86::VPADDUSWrr, X86::VPADDUSWrm, TB_ALIGN_16 },
{ X86::VPADDWrr, X86::VPADDWrm, TB_ALIGN_16 },
+ { X86::VPALIGNR128rr, X86::VPALIGNR128rm, TB_ALIGN_16 },
{ X86::VPANDNrr, X86::VPANDNrm, TB_ALIGN_16 },
{ X86::VPANDrr, X86::VPANDrm, TB_ALIGN_16 },
+ { X86::VPAVGBrr, X86::VPAVGBrm, TB_ALIGN_16 },
+ { X86::VPAVGWrr, X86::VPAVGWrm, TB_ALIGN_16 },
+ { X86::VPBLENDWrri, X86::VPBLENDWrmi, TB_ALIGN_16 },
{ X86::VPCMPEQBrr, X86::VPCMPEQBrm, TB_ALIGN_16 },
{ X86::VPCMPEQDrr, X86::VPCMPEQDrm, TB_ALIGN_16 },
+ { X86::VPCMPEQQrr, X86::VPCMPEQQrm, TB_ALIGN_16 },
{ X86::VPCMPEQWrr, X86::VPCMPEQWrm, TB_ALIGN_16 },
{ X86::VPCMPGTBrr, X86::VPCMPGTBrm, TB_ALIGN_16 },
{ X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 },
+ { X86::VPCMPGTQrr, X86::VPCMPGTQrm, TB_ALIGN_16 },
{ X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 },
+ { X86::VPHADDDrr, X86::VPHADDDrm, TB_ALIGN_16 },
+ { X86::VPHADDSWrr128, X86::VPHADDSWrm128, TB_ALIGN_16 },
+ { X86::VPHADDWrr, X86::VPHADDWrm, TB_ALIGN_16 },
+ { X86::VPHSUBDrr, X86::VPHSUBDrm, TB_ALIGN_16 },
+ { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, TB_ALIGN_16 },
+ { X86::VPHSUBWrr, X86::VPHSUBWrm, TB_ALIGN_16 },
+ { X86::VPERMILPDrr, X86::VPERMILPDrm, TB_ALIGN_16 },
+ { X86::VPERMILPSrr, X86::VPERMILPSrm, TB_ALIGN_16 },
{ X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 },
+ { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, TB_ALIGN_16 },
{ X86::VPMADDWDrr, X86::VPMADDWDrm, TB_ALIGN_16 },
{ X86::VPMAXSWrr, X86::VPMAXSWrm, TB_ALIGN_16 },
{ X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 },
{ X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 },
{ X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 },
{ X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 },
+ { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, TB_ALIGN_16 },
{ X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 },
{ X86::VPMULHWrr, X86::VPMULHWrm, TB_ALIGN_16 },
{ X86::VPMULLDrr, X86::VPMULLDrm, TB_ALIGN_16 },
@@ -853,6 +932,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 },
{ X86::VPORrr, X86::VPORrm, TB_ALIGN_16 },
{ X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 },
+ { X86::VPSHUFBrr, X86::VPSHUFBrm, TB_ALIGN_16 },
+ { X86::VPSIGNBrr, X86::VPSIGNBrm, TB_ALIGN_16 },
+ { X86::VPSIGNWrr, X86::VPSIGNWrm, TB_ALIGN_16 },
+ { X86::VPSIGNDrr, X86::VPSIGNDrm, TB_ALIGN_16 },
{ X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 },
{ X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 },
{ X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 },
@@ -886,14 +969,154 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, TB_ALIGN_16 },
{ X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, TB_ALIGN_16 },
{ X86::VXORPDrr, X86::VXORPDrm, TB_ALIGN_16 },
- { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 }
+ { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 },
+ // AVX 256-bit foldable instructions
+ { X86::VADDPDYrr, X86::VADDPDYrm, TB_ALIGN_32 },
+ { X86::VADDPSYrr, X86::VADDPSYrm, TB_ALIGN_32 },
+ { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, TB_ALIGN_32 },
+ { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, TB_ALIGN_32 },
+ { X86::VANDNPDYrr, X86::VANDNPDYrm, TB_ALIGN_32 },
+ { X86::VANDNPSYrr, X86::VANDNPSYrm, TB_ALIGN_32 },
+ { X86::VANDPDYrr, X86::VANDPDYrm, TB_ALIGN_32 },
+ { X86::VANDPSYrr, X86::VANDPSYrm, TB_ALIGN_32 },
+ { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, TB_ALIGN_32 },
+ { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, TB_ALIGN_32 },
+ { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, TB_ALIGN_32 },
+ { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, TB_ALIGN_32 },
+ { X86::VCMPPDYrri, X86::VCMPPDYrmi, TB_ALIGN_32 },
+ { X86::VCMPPSYrri, X86::VCMPPSYrmi, TB_ALIGN_32 },
+ { X86::VDIVPDYrr, X86::VDIVPDYrm, TB_ALIGN_32 },
+ { X86::VDIVPSYrr, X86::VDIVPSYrm, TB_ALIGN_32 },
+ { X86::VHADDPDYrr, X86::VHADDPDYrm, TB_ALIGN_32 },
+ { X86::VHADDPSYrr, X86::VHADDPSYrm, TB_ALIGN_32 },
+ { X86::VHSUBPDYrr, X86::VHSUBPDYrm, TB_ALIGN_32 },
+ { X86::VHSUBPSYrr, X86::VHSUBPSYrm, TB_ALIGN_32 },
+ { X86::VINSERTF128rr, X86::VINSERTF128rm, TB_ALIGN_32 },
+ { X86::VMAXPDYrr, X86::VMAXPDYrm, TB_ALIGN_32 },
+ { X86::VMAXPDYrr_Int, X86::VMAXPDYrm_Int, TB_ALIGN_32 },
+ { X86::VMAXPSYrr, X86::VMAXPSYrm, TB_ALIGN_32 },
+ { X86::VMAXPSYrr_Int, X86::VMAXPSYrm_Int, TB_ALIGN_32 },
+ { X86::VMINPDYrr, X86::VMINPDYrm, TB_ALIGN_32 },
+ { X86::VMINPDYrr_Int, X86::VMINPDYrm_Int, TB_ALIGN_32 },
+ { X86::VMINPSYrr, X86::VMINPSYrm, TB_ALIGN_32 },
+ { X86::VMINPSYrr_Int, X86::VMINPSYrm_Int, TB_ALIGN_32 },
+ { X86::VMULPDYrr, X86::VMULPDYrm, TB_ALIGN_32 },
+ { X86::VMULPSYrr, X86::VMULPSYrm, TB_ALIGN_32 },
+ { X86::VORPDYrr, X86::VORPDYrm, TB_ALIGN_32 },
+ { X86::VORPSYrr, X86::VORPSYrm, TB_ALIGN_32 },
+ { X86::VPERM2F128rr, X86::VPERM2F128rm, TB_ALIGN_32 },
+ { X86::VPERMILPDYrr, X86::VPERMILPDYrm, TB_ALIGN_32 },
+ { X86::VPERMILPSYrr, X86::VPERMILPSYrm, TB_ALIGN_32 },
+ { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, TB_ALIGN_32 },
+ { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, TB_ALIGN_32 },
+ { X86::VSUBPDYrr, X86::VSUBPDYrm, TB_ALIGN_32 },
+ { X86::VSUBPSYrr, X86::VSUBPSYrm, TB_ALIGN_32 },
+ { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, TB_ALIGN_32 },
+ { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, TB_ALIGN_32 },
+ { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, TB_ALIGN_32 },
+ { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, TB_ALIGN_32 },
+ { X86::VXORPDYrr, X86::VXORPDYrm, TB_ALIGN_32 },
+ { X86::VXORPSYrr, X86::VXORPSYrm, TB_ALIGN_32 },
+ // AVX2 foldable instructions
+ { X86::VINSERTI128rr, X86::VINSERTI128rm, TB_ALIGN_16 },
+ { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, TB_ALIGN_32 },
+ { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, TB_ALIGN_32 },
+ { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, TB_ALIGN_32 },
+ { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, TB_ALIGN_32 },
+ { X86::VPADDBYrr, X86::VPADDBYrm, TB_ALIGN_32 },
+ { X86::VPADDDYrr, X86::VPADDDYrm, TB_ALIGN_32 },
+ { X86::VPADDQYrr, X86::VPADDQYrm, TB_ALIGN_32 },
+ { X86::VPADDSBYrr, X86::VPADDSBYrm, TB_ALIGN_32 },
+ { X86::VPADDSWYrr, X86::VPADDSWYrm, TB_ALIGN_32 },
+ { X86::VPADDUSBYrr, X86::VPADDUSBYrm, TB_ALIGN_32 },
+ { X86::VPADDUSWYrr, X86::VPADDUSWYrm, TB_ALIGN_32 },
+ { X86::VPADDWYrr, X86::VPADDWYrm, TB_ALIGN_32 },
+ { X86::VPALIGNR256rr, X86::VPALIGNR256rm, TB_ALIGN_32 },
+ { X86::VPANDNYrr, X86::VPANDNYrm, TB_ALIGN_32 },
+ { X86::VPANDYrr, X86::VPANDYrm, TB_ALIGN_32 },
+ { X86::VPAVGBYrr, X86::VPAVGBYrm, TB_ALIGN_32 },
+ { X86::VPAVGWYrr, X86::VPAVGWYrm, TB_ALIGN_32 },
+ { X86::VPBLENDDrri, X86::VPBLENDDrmi, TB_ALIGN_32 },
+ { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, TB_ALIGN_32 },
+ { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, TB_ALIGN_32 },
+ { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, TB_ALIGN_32 },
+ { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, TB_ALIGN_32 },
+ { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, TB_ALIGN_32 },
+ { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, TB_ALIGN_32 },
+ { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, TB_ALIGN_32 },
+ { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, TB_ALIGN_32 },
+ { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, TB_ALIGN_32 },
+ { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_32 },
+ { X86::VPERM2I128rr, X86::VPERM2I128rm, TB_ALIGN_32 },
+ { X86::VPERMDYrr, X86::VPERMDYrm, TB_ALIGN_32 },
+ { X86::VPERMPDYrr, X86::VPERMPDYrm, TB_ALIGN_32 },
+ { X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 },
+ { X86::VPERMQYrr, X86::VPERMQYrm, TB_ALIGN_32 },
+ { X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 },
+ { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 },
+ { X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 },
+ { X86::VPHSUBDYrr, X86::VPHSUBDYrm, TB_ALIGN_32 },
+ { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_32 },
+ { X86::VPHSUBWYrr, X86::VPHSUBWYrm, TB_ALIGN_32 },
+ { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_32 },
+ { X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_32 },
+ { X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_32 },
+ { X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_32 },
+ { X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_32 },
+ { X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_32 },
+ { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_32 },
+ { X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_32 },
+ { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_32 },
+ { X86::VPMULHUWYrr, X86::VPMULHUWYrm, TB_ALIGN_32 },
+ { X86::VPMULHWYrr, X86::VPMULHWYrm, TB_ALIGN_32 },
+ { X86::VPMULLDYrr, X86::VPMULLDYrm, TB_ALIGN_32 },
+ { X86::VPMULLWYrr, X86::VPMULLWYrm, TB_ALIGN_32 },
+ { X86::VPMULUDQYrr, X86::VPMULUDQYrm, TB_ALIGN_32 },
+ { X86::VPORYrr, X86::VPORYrm, TB_ALIGN_32 },
+ { X86::VPSADBWYrr, X86::VPSADBWYrm, TB_ALIGN_32 },
+ { X86::VPSHUFBYrr, X86::VPSHUFBYrm, TB_ALIGN_32 },
+ { X86::VPSIGNBYrr, X86::VPSIGNBYrm, TB_ALIGN_32 },
+ { X86::VPSIGNWYrr, X86::VPSIGNWYrm, TB_ALIGN_32 },
+ { X86::VPSIGNDYrr, X86::VPSIGNDYrm, TB_ALIGN_32 },
+ { X86::VPSLLDYrr, X86::VPSLLDYrm, TB_ALIGN_16 },
+ { X86::VPSLLQYrr, X86::VPSLLQYrm, TB_ALIGN_16 },
+ { X86::VPSLLWYrr, X86::VPSLLWYrm, TB_ALIGN_16 },
+ { X86::VPSLLVDrr, X86::VPSLLVDrm, TB_ALIGN_16 },
+ { X86::VPSLLVDYrr, X86::VPSLLVDYrm, TB_ALIGN_32 },
+ { X86::VPSLLVQrr, X86::VPSLLVQrm, TB_ALIGN_16 },
+ { X86::VPSLLVQYrr, X86::VPSLLVQYrm, TB_ALIGN_32 },
+ { X86::VPSRADYrr, X86::VPSRADYrm, TB_ALIGN_16 },
+ { X86::VPSRAWYrr, X86::VPSRAWYrm, TB_ALIGN_16 },
+ { X86::VPSRAVDrr, X86::VPSRAVDrm, TB_ALIGN_16 },
+ { X86::VPSRAVDYrr, X86::VPSRAVDYrm, TB_ALIGN_32 },
+ { X86::VPSRLDYrr, X86::VPSRLDYrm, TB_ALIGN_16 },
+ { X86::VPSRLQYrr, X86::VPSRLQYrm, TB_ALIGN_16 },
+ { X86::VPSRLWYrr, X86::VPSRLWYrm, TB_ALIGN_16 },
+ { X86::VPSRLVDrr, X86::VPSRLVDrm, TB_ALIGN_16 },
+ { X86::VPSRLVDYrr, X86::VPSRLVDYrm, TB_ALIGN_32 },
+ { X86::VPSRLVQrr, X86::VPSRLVQrm, TB_ALIGN_16 },
+ { X86::VPSRLVQYrr, X86::VPSRLVQYrm, TB_ALIGN_32 },
+ { X86::VPSUBBYrr, X86::VPSUBBYrm, TB_ALIGN_32 },
+ { X86::VPSUBDYrr, X86::VPSUBDYrm, TB_ALIGN_32 },
+ { X86::VPSUBSBYrr, X86::VPSUBSBYrm, TB_ALIGN_32 },
+ { X86::VPSUBSWYrr, X86::VPSUBSWYrm, TB_ALIGN_32 },
+ { X86::VPSUBWYrr, X86::VPSUBWYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, TB_ALIGN_16 },
+ { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, TB_ALIGN_32 },
+ { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 },
+ { X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 },
// FIXME: add AVX 256-bit foldable instructions
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
- unsigned RegOp = OpTbl2[i][0];
- unsigned MemOp = OpTbl2[i][1];
- unsigned Flags = OpTbl2[i][2];
+ unsigned RegOp = OpTbl2[i].RegOp;
+ unsigned MemOp = OpTbl2[i].MemOp;
+ unsigned Flags = OpTbl2[i].Flags;
AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
RegOp, MemOp,
// Index 2, folded load
@@ -946,7 +1169,6 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
switch (MI.getOpcode()) {
default:
llvm_unreachable(0);
- break;
case X86::MOVSX16rr8:
case X86::MOVZX16rr8:
case X86::MOVSX32rr8:
@@ -989,7 +1211,8 @@ bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
static bool isFrameLoadOpcode(int Opcode) {
switch (Opcode) {
- default: break;
+ default:
+ return false;
case X86::MOV8rm:
case X86::MOV16rm:
case X86::MOV32rm:
@@ -1011,9 +1234,7 @@ static bool isFrameLoadOpcode(int Opcode) {
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
return true;
- break;
}
- return false;
}
static bool isFrameStoreOpcode(int Opcode) {
@@ -1203,6 +1424,8 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
bool SeenDef = false;
for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
MachineOperand &MO = Iter->getOperand(j);
+ if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
+ SeenDef = true;
if (!MO.isReg())
continue;
if (MO.getReg() == X86::EFLAGS) {
@@ -1247,6 +1470,10 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
bool SawKill = false;
for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
MachineOperand &MO = Iter->getOperand(j);
+ // A register mask may clobber EFLAGS, but we should still look for a
+ // live EFLAGS def.
+ if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
+ SawKill = true;
if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
if (MO.isDef()) return MO.isDead();
if (MO.isKill()) SawKill = true;
@@ -1357,7 +1584,6 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
switch (MIOpc) {
default:
llvm_unreachable(0);
- break;
case X86::SHL16ri: {
unsigned ShAmt = MI->getOperand(2).getImm();
MIB.addReg(0).addImm(1 << ShAmt)
@@ -1392,9 +1618,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// Build and insert into an implicit UNDEF value. This is OK because
// well be shifting and then extracting the lower 16-bits.
- BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+ BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF),leaInReg2);
InsMI2 =
- BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
.addReg(leaInReg2, RegState::Define, X86::sub_16bit)
.addReg(Src2, getKillRegState(isKill2));
addRegReg(MIB, leaInReg, true, leaInReg2, true);
@@ -1469,6 +1695,24 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
.addReg(B, getKillRegState(isKill)).addImm(M);
break;
}
+ case X86::SHUFPDrri: {
+ assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!");
+ if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+
+ unsigned B = MI->getOperand(1).getReg();
+ unsigned C = MI->getOperand(2).getReg();
+ if (B != C) return 0;
+ unsigned A = MI->getOperand(0).getReg();
+ unsigned M = MI->getOperand(3).getImm();
+
+ // Convert to PSHUFD mask.
+ M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44;
+
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
+ .addReg(A, RegState::Define | getDeadRegState(isDead))
+ .addReg(B, getKillRegState(isKill)).addImm(M);
+ break;
+ }
case X86::SHL64ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
// NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
@@ -1597,7 +1841,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD32rr_DB: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc;
- TargetRegisterClass *RC;
+ const TargetRegisterClass *RC;
if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
Opc = X86::LEA64r;
RC = X86::GR64_NOSPRegisterClass;
@@ -1904,13 +2148,12 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
}
bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isTerminator()) return false;
+ if (!MI->isTerminator()) return false;
// Conditional branch is a special case.
- if (MCID.isBranch() && !MCID.isBarrier())
+ if (MI->isBranch() && !MI->isBarrier())
return true;
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return true;
return !isPredicated(MI);
}
@@ -1936,7 +2179,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// A terminator that isn't a branch can't easily be handled by this
// analysis.
- if (!I->getDesc().isBranch())
+ if (!I->isBranch())
return true;
// Handle unconditional branches.
@@ -2420,7 +2663,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
switch (MI->getOpcode()) {
case X86::V_SET0:
- return Expand2AddrUndef(MI, get(HasAVX ? X86::VPXORrr : X86::PXORrr));
+ case X86::FsFLD0SS:
+ case X86::FsFLD0SD:
+ return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return true;
@@ -2624,6 +2869,10 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
///
static bool hasPartialRegUpdate(unsigned Opcode) {
switch (Opcode) {
+ case X86::CVTSI2SSrr:
+ case X86::CVTSI2SS64rr:
+ case X86::CVTSI2SDrr:
+ case X86::CVTSI2SD64rr:
case X86::CVTSD2SSrr:
case X86::Int_CVTSD2SSrr:
case X86::CVTSS2SDrr:
@@ -2631,7 +2880,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
case X86::RCPSSr:
case X86::RCPSSr_Int:
case X86::ROUNDSDr:
+ case X86::ROUNDSDr_Int:
case X86::ROUNDSSr:
+ case X86::ROUNDSSr_Int:
case X86::RSQRTSSr:
case X86::RSQRTSSr_Int:
case X86::SQRTSSr:
@@ -2643,7 +2894,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
case X86::Int_VCVTSS2SDrr:
case X86::VRCPSSr:
case X86::VROUNDSDr:
+ case X86::VROUNDSDr_Int:
case X86::VROUNDSSr:
+ case X86::VROUNDSSr_Int:
case X86::VRSQRTSSr:
case X86::VSQRTSSr:
return true;
@@ -2652,6 +2905,54 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
return false;
}
+/// getPartialRegUpdateClearance - Inform the ExeDepsFix pass how many idle
+/// instructions we would like before a partial register update.
+unsigned X86InstrInfo::
+getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ if (OpNum != 0 || !hasPartialRegUpdate(MI->getOpcode()))
+ return 0;
+
+ // If MI is marked as reading Reg, the partial register update is wanted.
+ const MachineOperand &MO = MI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (MO.readsReg() || MI->readsVirtualRegister(Reg))
+ return 0;
+ } else {
+ if (MI->readsRegister(Reg, TRI))
+ return 0;
+ }
+
+ // If any of the preceding 16 instructions are reading Reg, insert a
+ // dependency breaking instruction. The magic number is based on a few
+ // Nehalem experiments.
+ return 16;
+}
+
+void X86InstrInfo::
+breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ if (X86::VR128RegClass.contains(Reg)) {
+ // These instructions are all floating point domain, so xorps is the best
+ // choice.
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr;
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg)
+ .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
+ } else if (X86::VR256RegClass.contains(Reg)) {
+ // Use vxorps to clear the full ymm register.
+ // It wants to read and write the xmm sub-register.
+ unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm);
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(X86::VXORPSrr), XReg)
+ .addReg(XReg, RegState::Undef).addReg(XReg, RegState::Undef)
+ .addReg(Reg, RegState::ImplicitDefine);
+ } else
+ return;
+ MI->addRegisterKilled(Reg, TRI, true);
+}
+
MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
@@ -2714,6 +3015,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
switch (LoadMI->getOpcode()) {
case X86::AVX_SET0PSY:
case X86::AVX_SET0PDY:
+ case X86::AVX2_SETALLONES:
+ case X86::AVX2_SET0:
Alignment = 32;
break;
case X86::V_SET0:
@@ -2722,11 +3025,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = 16;
break;
case X86::FsFLD0SD:
- case X86::VFsFLD0SD:
Alignment = 8;
break;
case X86::FsFLD0SS:
- case X86::VFsFLD0SS:
Alignment = 4;
break;
default:
@@ -2759,10 +3060,10 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
case X86::AVX_SET0PSY:
case X86::AVX_SET0PDY:
case X86::AVX_SETALLONES:
+ case X86::AVX2_SETALLONES:
+ case X86::AVX2_SET0:
case X86::FsFLD0SD:
- case X86::FsFLD0SS:
- case X86::VFsFLD0SD:
- case X86::VFsFLD0SS: {
+ case X86::FsFLD0SS: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
@@ -2788,16 +3089,19 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineConstantPool &MCP = *MF.getConstantPool();
Type *Ty;
unsigned Opc = LoadMI->getOpcode();
- if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
+ if (Opc == X86::FsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
- else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
+ else if (Opc == X86::FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
+ else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX2_SET0)
+ Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
- bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES);
+ bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES ||
+ Opc == X86::AVX2_SETALLONES);
const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
Constant::getNullValue(Ty);
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
@@ -3329,7 +3633,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
// These are the replaceable SSE instructions. Some of these have Int variants
// that we don't include here. We don't want to replace instructions selected
// by intrinsics.
-static const unsigned ReplaceableInstrs[][3] = {
+static const uint16_t ReplaceableInstrs[][3] = {
//PackedSingle PackedDouble PackedInt
{ X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
{ X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
@@ -3366,31 +3670,66 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr },
{ X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr },
{ X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm },
- { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr },
+ { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }
+};
+
+static const uint16_t ReplaceableInstrsAVX2[][3] = {
+ //PackedSingle PackedDouble PackedInt
+ { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm },
+ { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr },
+ { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm },
+ { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDYrr },
+ { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm },
+ { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
+ { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
+ { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
+ { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
+ { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
+ { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
+ { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
+ { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
+ { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }
};
// FIXME: Some shuffle and unpack instructions have equivalents in different
// domains, but they require a bit more work than just switching opcodes.
-static const unsigned *lookup(unsigned opcode, unsigned domain) {
+static const uint16_t *lookup(unsigned opcode, unsigned domain) {
for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
if (ReplaceableInstrs[i][domain-1] == opcode)
return ReplaceableInstrs[i];
return 0;
}
+static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
+ for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
+ if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
+ return ReplaceableInstrsAVX2[i];
+ return 0;
+}
+
std::pair<uint16_t, uint16_t>
X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const {
uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
- return std::make_pair(domain,
- domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
+ bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2();
+ uint16_t validDomains = 0;
+ if (domain && lookup(MI->getOpcode(), domain))
+ validDomains = 0xe;
+ else if (domain && lookupAVX2(MI->getOpcode(), domain))
+ validDomains = hasAVX2 ? 0xe : 0x6;
+ return std::make_pair(domain, validDomains);
}
void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
assert(Domain>0 && Domain<4 && "Invalid execution domain");
uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
assert(dom && "Not an SSE instruction");
- const unsigned *table = lookup(MI->getOpcode(), dom);
+ const uint16_t *table = lookup(MI->getOpcode(), dom);
+ if (!table) { // try the other table
+ assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) &&
+ "256-bit vector operations only available in AVX2");
+ table = lookupAVX2(MI->getOpcode(), dom);
+ }
assert(table && "Cannot change domain");
MI->setDesc(get(table[Domain-1]));
}
OpenPOWER on IntegriCloud