summaryrefslogtreecommitdiffstats
path: root/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp6
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp13
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp16
-rw-r--r--lib/Target/X86/X86COFF.h95
-rw-r--r--lib/Target/X86/X86FastISel.cpp7
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp74
-rw-r--r--lib/Target/X86/X86ISelLowering.h1
-rw-r--r--lib/Target/X86/X86InstrFormats.td3
-rw-r--r--lib/Target/X86/X86InstrInfo.h8
-rw-r--r--lib/Target/X86/X86InstrSSE.td116
-rw-r--r--lib/Target/X86/X86MCCodeEmitter.cpp3
11 files changed, 176 insertions, 166 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index a856e9c..f1e66ab 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -65,7 +65,7 @@ public:
X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
: TargetAsmParser(T), Parser(_Parser) {}
- virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+ virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
virtual bool ParseDirective(AsmToken DirectiveID);
@@ -602,7 +602,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
}
bool X86ATTAsmParser::
-ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// The various flavors of pushf and popf use Requires<In32BitMode> and
// Requires<In64BitMode>, but the assembler doesn't yet implement that.
@@ -612,6 +612,8 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
else if (Name == "pushfl")
return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
+ else if (Name == "pusha")
+ return Error(NameLoc, "pusha cannot be encoded in 64-bit mode");
} else {
if (Name == "popfq")
return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index 73bc603..08e6486 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -17,7 +17,6 @@
#include "X86IntelInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86.h"
-#include "X86COFF.h"
#include "X86COFFMachineModuleInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
@@ -35,6 +34,7 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/COFF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetOptions.h"
@@ -60,8 +60,10 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
if (Subtarget->isTargetCOFF()) {
bool Intrn = MF.getFunction()->hasInternalLinkage();
OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
- OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::C_STAT : COFF::C_EXT);
- OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+ OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
+ : COFF::IMAGE_SYM_CLASS_EXTERNAL);
+ OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
OutStreamer.EndCOFFSymbolDef();
}
@@ -582,8 +584,9 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
E = COFFMMI.externals_end();
I != E; ++I) {
OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
- OutStreamer.EmitCOFFSymbolStorageClass(COFF::C_EXT);
- OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+ OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
+ OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
OutStreamer.EndCOFFSymbolDef();
}
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 09f150b..e67fc06 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -154,15 +154,13 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break;
case X86II::MO_TLVP_PIC_BASE:
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
- // Subtract the pic base.
- Expr
- = MCBinaryExpr::CreateSub(Expr,
- MCSymbolRefExpr::Create(GetPICBaseSymbol(),
- Ctx),
- Ctx);
-
- break;
+ Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
+ // Subtract the pic base.
+ Expr = MCBinaryExpr::CreateSub(Expr,
+ MCSymbolRefExpr::Create(GetPICBaseSymbol(),
+ Ctx),
+ Ctx);
+ break;
case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break;
case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
diff --git a/lib/Target/X86/X86COFF.h b/lib/Target/X86/X86COFF.h
deleted file mode 100644
index 0a8e4e6..0000000
--- a/lib/Target/X86/X86COFF.h
+++ /dev/null
@@ -1,95 +0,0 @@
-//===--- X86COFF.h - Some definitions from COFF documentations ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file just defines some symbols found in COFF documentation. They are
-// used to emit function type information for COFF targets (Cygwin/Mingw32).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86COFF_H
-#define X86COFF_H
-
-namespace COFF
-{
-/// Storage class tells where and what the symbol represents
-enum StorageClass {
- C_EFCN = -1, ///< Physical end of function
- C_NULL = 0, ///< No symbol
- C_AUTO = 1, ///< External definition
- C_EXT = 2, ///< External symbol
- C_STAT = 3, ///< Static
- C_REG = 4, ///< Register variable
- C_EXTDEF = 5, ///< External definition
- C_LABEL = 6, ///< Label
- C_ULABEL = 7, ///< Undefined label
- C_MOS = 8, ///< Member of structure
- C_ARG = 9, ///< Function argument
- C_STRTAG = 10, ///< Structure tag
- C_MOU = 11, ///< Member of union
- C_UNTAG = 12, ///< Union tag
- C_TPDEF = 13, ///< Type definition
- C_USTATIC = 14, ///< Undefined static
- C_ENTAG = 15, ///< Enumeration tag
- C_MOE = 16, ///< Member of enumeration
- C_REGPARM = 17, ///< Register parameter
- C_FIELD = 18, ///< Bit field
-
- C_BLOCK = 100, ///< ".bb" or ".eb" - beginning or end of block
- C_FCN = 101, ///< ".bf" or ".ef" - beginning or end of function
- C_EOS = 102, ///< End of structure
- C_FILE = 103, ///< File name
- C_LINE = 104, ///< Line number, reformatted as symbol
- C_ALIAS = 105, ///< Duplicate tag
- C_HIDDEN = 106 ///< External symbol in dmert public lib
-};
-
-/// The type of the symbol. This is made up of a base type and a derived type.
-/// For example, pointer to int is "pointer to T" and "int"
-enum SymbolType {
- T_NULL = 0, ///< No type info
- T_ARG = 1, ///< Void function argument (only used by compiler)
- T_VOID = 1, ///< The same as above. Just named differently in some specs.
- T_CHAR = 2, ///< Character
- T_SHORT = 3, ///< Short integer
- T_INT = 4, ///< Integer
- T_LONG = 5, ///< Long integer
- T_FLOAT = 6, ///< Floating point
- T_DOUBLE = 7, ///< Double word
- T_STRUCT = 8, ///< Structure
- T_UNION = 9, ///< Union
- T_ENUM = 10, ///< Enumeration
- T_MOE = 11, ///< Member of enumeration
- T_UCHAR = 12, ///< Unsigned character
- T_USHORT = 13, ///< Unsigned short
- T_UINT = 14, ///< Unsigned integer
- T_ULONG = 15 ///< Unsigned long
-};
-
-/// Derived type of symbol
-enum SymbolDerivedType {
- DT_NON = 0, ///< No derived type
- DT_PTR = 1, ///< Pointer to T
- DT_FCN = 2, ///< Function returning T
- DT_ARY = 3 ///< Array of T
-};
-
-/// Masks for extracting parts of type
-enum SymbolTypeMasks {
- N_BTMASK = 017, ///< Mask for base type
- N_TMASK = 060 ///< Mask for derived type
-};
-
-/// Offsets of parts of type
-enum Shifts {
- N_BTSHFT = 4 ///< Type is formed as (base + derived << N_BTSHIFT)
-};
-
-}
-
-#endif // X86COFF_H
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index cdde24a..ce13707 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -540,7 +540,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
StubAM.GVOpFlags = GVFlags;
// Prepare for inserting code in the local-value area.
- MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea();
+ SavePoint SaveInsertPt = enterLocalValueArea();
if (TLI.getPointerTy() == MVT::i64) {
Opc = X86::MOV64rm;
@@ -1279,12 +1279,11 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
return false;
// First issue a copy to GR16_ABCD or GR32_ABCD.
- unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr;
const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
unsigned CopyReg = createResultReg(CopyRC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CopyOpc), CopyReg)
- .addReg(InputReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CopyReg).addReg(InputReg);
// Then issue an extract_subreg.
unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1a63474..b3c4886 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2458,17 +2458,23 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// If the tailcall address may be in a register, then make sure it's
// possible to register allocate for it. In 32-bit, the call address can
// only target EAX, EDX, or ECX since the tail call must be scheduled after
- // callee-saved registers are restored. In 64-bit, it's RAX, RCX, RDX, RSI,
- // RDI, R8, R9, R11.
- if (!isa<GlobalAddressSDNode>(Callee) &&
+ // callee-saved registers are restored. These happen to be the same
+ // registers used to pass 'inreg' arguments so watch out for those.
+ if (!Subtarget->is64Bit() &&
+ !isa<GlobalAddressSDNode>(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) {
- unsigned Limit = Subtarget->is64Bit() ? 8 : 3;
unsigned NumInRegs = 0;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- if (VA.isRegLoc()) {
- if (++NumInRegs == Limit)
+ if (!VA.isRegLoc())
+ continue;
+ unsigned Reg = VA.getLocReg();
+ switch (Reg) {
+ default: break;
+ case X86::EAX: case X86::EDX: case X86::ECX:
+ if (++NumInRegs == 3)
return false;
+ break;
}
}
}
@@ -7993,7 +7999,6 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
unsigned immOpc,
unsigned LoadOpc,
unsigned CXchgOpc,
- unsigned copyOpc,
unsigned notOpc,
unsigned EAXreg,
TargetRegisterClass *RC,
@@ -8070,7 +8075,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
MIB.addReg(tt);
(*MIB).addOperand(*argOpers[valArgIndx]);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), EAXreg);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
MIB.addReg(t1);
MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
@@ -8081,7 +8086,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
(*MIB).setMemRefs(bInstr->memoperands_begin(),
bInstr->memoperands_end());
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg());
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
MIB.addReg(EAXreg);
// insert branch
@@ -8117,7 +8122,6 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
const TargetRegisterClass *RC = X86::GR32RegisterClass;
const unsigned LoadOpc = X86::MOV32rm;
- const unsigned copyOpc = X86::MOV32rr;
const unsigned NotOpc = X86::NOT32r;
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
@@ -8227,14 +8231,14 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
MIB.addReg(t2);
(*MIB).addOperand(*argOpers[valArgIndx + 1]);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
MIB.addReg(t1);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EDX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
MIB.addReg(t2);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EBX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
MIB.addReg(t5);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::ECX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
MIB.addReg(t6);
MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
@@ -8245,9 +8249,9 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
(*MIB).setMemRefs(bInstr->memoperands_begin(),
bInstr->memoperands_end());
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
MIB.addReg(X86::EAX);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t4);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
MIB.addReg(X86::EDX);
// insert branch
@@ -8326,12 +8330,12 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
if (argOpers[valArgIndx]->isReg())
- MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
else
MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
(*MIB).addOperand(*argOpers[valArgIndx]);
- MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), X86::EAX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
MIB.addReg(t1);
MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
@@ -8353,7 +8357,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
(*MIB).setMemRefs(mInstr->memoperands_begin(),
mInstr->memoperands_end());
- MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg());
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
MIB.addReg(X86::EAX);
// insert branch
@@ -8735,25 +8739,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::ATOMAND32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
X86::AND32ri, X86::MOV32rm,
- X86::LCMPXCHG32, X86::MOV32rr,
+ X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass);
case X86::ATOMOR32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
X86::OR32ri, X86::MOV32rm,
- X86::LCMPXCHG32, X86::MOV32rr,
+ X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass);
case X86::ATOMXOR32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
X86::XOR32ri, X86::MOV32rm,
- X86::LCMPXCHG32, X86::MOV32rr,
+ X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass);
case X86::ATOMNAND32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
X86::AND32ri, X86::MOV32rm,
- X86::LCMPXCHG32, X86::MOV32rr,
+ X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass, true);
case X86::ATOMMIN32:
@@ -8768,25 +8772,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::ATOMAND16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
X86::AND16ri, X86::MOV16rm,
- X86::LCMPXCHG16, X86::MOV16rr,
+ X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass);
case X86::ATOMOR16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
X86::OR16ri, X86::MOV16rm,
- X86::LCMPXCHG16, X86::MOV16rr,
+ X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass);
case X86::ATOMXOR16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
X86::XOR16ri, X86::MOV16rm,
- X86::LCMPXCHG16, X86::MOV16rr,
+ X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass);
case X86::ATOMNAND16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
X86::AND16ri, X86::MOV16rm,
- X86::LCMPXCHG16, X86::MOV16rr,
+ X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass, true);
case X86::ATOMMIN16:
@@ -8801,25 +8805,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::ATOMAND8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
X86::AND8ri, X86::MOV8rm,
- X86::LCMPXCHG8, X86::MOV8rr,
+ X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass);
case X86::ATOMOR8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
X86::OR8ri, X86::MOV8rm,
- X86::LCMPXCHG8, X86::MOV8rr,
+ X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass);
case X86::ATOMXOR8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
X86::XOR8ri, X86::MOV8rm,
- X86::LCMPXCHG8, X86::MOV8rr,
+ X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass);
case X86::ATOMNAND8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
X86::AND8ri, X86::MOV8rm,
- X86::LCMPXCHG8, X86::MOV8rr,
+ X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass, true);
// FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
@@ -8827,25 +8831,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::ATOMAND64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
X86::AND64ri32, X86::MOV64rm,
- X86::LCMPXCHG64, X86::MOV64rr,
+ X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass);
case X86::ATOMOR64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
X86::OR64ri32, X86::MOV64rm,
- X86::LCMPXCHG64, X86::MOV64rr,
+ X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass);
case X86::ATOMXOR64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
X86::XOR64ri32, X86::MOV64rm,
- X86::LCMPXCHG64, X86::MOV64rr,
+ X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass);
case X86::ATOMNAND64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
X86::AND64ri32, X86::MOV64rm,
- X86::LCMPXCHG64, X86::MOV64rr,
+ X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass, true);
case X86::ATOMMIN64:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 2d28e5c..4e4daa4 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -764,7 +764,6 @@ namespace llvm {
unsigned immOpc,
unsigned loadOpc,
unsigned cxchgOpc,
- unsigned copyOpc,
unsigned notOpc,
unsigned EAXreg,
TargetRegisterClass *RC,
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 97578af..cc3fdf1 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -106,6 +106,7 @@ class VEX { bit hasVEXPrefix = 1; }
class VEX_W { bit hasVEX_WPrefix = 1; }
class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
+class VEX_L { bit hasVEX_L = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr, Domain d = GenericDomain>
@@ -138,6 +139,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field?
bit hasVEX_i8ImmReg = 0; // Does this inst requires the last source register
// to be encoded in a immediate field?
+ bit hasVEX_L = 0; // Does this inst uses large (256-bit) registers?
// TSFlags layout should be kept in sync with X86InstrInfo.h.
let TSFlags{5-0} = FormBits;
@@ -155,6 +157,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{33} = hasVEX_WPrefix;
let TSFlags{34} = hasVEX_4VPrefix;
let TSFlags{35} = hasVEX_i8ImmReg;
+ let TSFlags{36} = hasVEX_L;
}
class I<bits<8> o, Format f, dag outs, dag ins, string asm,
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index f762b58..ad0217a 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -453,7 +453,13 @@ namespace X86II {
// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
// must be encoded in the i8 immediate field. This usually happens in
// instructions with 4 operands.
- VEX_I8IMM = 1ULL << 35
+ VEX_I8IMM = 1ULL << 35,
+
+ // VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+ // instruction uses 256-bit wide registers. This is usually auto detected if
+ // a VR256 register is used, but some AVX instructions also have this field
+ // marked when using a f256 memory references.
+ VEX_L = 1ULL << 36
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index ab0005b..ebe161b 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -666,6 +666,9 @@ defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB, VEX;
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB, VEX;
}
let Pattern = []<dag> in {
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
@@ -806,9 +809,13 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
// Convert packed single/double fp to doubleword
let isAsmParserOnly = 1 in {
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
}
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>;
@@ -862,6 +869,10 @@ def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
}
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>;
@@ -912,14 +923,39 @@ def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memop addr:$src)))]>;
+let isAsmParserOnly = 1 in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+}
+
// Convert packed single to packed double
-let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ // SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
- Requires<[HasAVX]>;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
- Requires<[HasAVX]>;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
}
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
@@ -949,10 +985,25 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
// Convert packed double to packed single
let isAsmParserOnly = 1 in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
-// FIXME: the memory form of this instruction should described using
-// use extra asm syntax
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
}
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
@@ -1142,6 +1193,16 @@ let isAsmParserOnly = 1 in {
"cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
"cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
SSEPackedDouble>, OpSize, VEX_4V;
+ let Pattern = []<dag> in {
+ defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ }
}
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
@@ -2935,19 +2996,46 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// SSE3 - Conversion Instructions
//===---------------------------------------------------------------------===//
+// Convert Packed Double FP to Packed DW Integers
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
}
def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+
+// Convert Packed DW Integers to Packed Double FP
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 633ddd4..23b0666 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -432,6 +432,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if (TSFlags & X86II::VEX_W)
VEX_W = 1;
+ if (TSFlags & X86II::VEX_L)
+ VEX_L = 1;
+
switch (TSFlags & X86II::Op0Mask) {
default: assert(0 && "Invalid prefix!");
case X86II::T8: // 0F 38
OpenPOWER on IntegriCloud