diff options
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/AsmParser/X86AsmParser.cpp | 6 | ||||
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp | 13 | ||||
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86MCInstLower.cpp | 16 | ||||
-rw-r--r-- | lib/Target/X86/X86COFF.h | 95 | ||||
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 7 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 74 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFormats.td | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.h | 8 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 116 | ||||
-rw-r--r-- | lib/Target/X86/X86MCCodeEmitter.cpp | 3 |
11 files changed, 176 insertions, 166 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index a856e9c..f1e66ab 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -65,7 +65,7 @@ public: X86ATTAsmParser(const Target &T, MCAsmParser &_Parser) : TargetAsmParser(T), Parser(_Parser) {} - virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc, + virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); @@ -602,7 +602,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { } bool X86ATTAsmParser:: -ParseInstruction(const StringRef &Name, SMLoc NameLoc, +ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // The various flavors of pushf and popf use Requires<In32BitMode> and // Requires<In64BitMode>, but the assembler doesn't yet implement that. @@ -612,6 +612,8 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, return Error(NameLoc, "popfl cannot be encoded in 64-bit mode"); else if (Name == "pushfl") return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode"); + else if (Name == "pusha") + return Error(NameLoc, "pusha cannot be encoded in 64-bit mode"); } else { if (Name == "popfq") return Error(NameLoc, "popfq cannot be encoded in 32-bit mode"); diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 73bc603..08e6486 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -17,7 +17,6 @@ #include "X86IntelInstPrinter.h" #include "X86MCInstLower.h" #include "X86.h" -#include "X86COFF.h" #include "X86COFFMachineModuleInfo.h" #include "X86MachineFunctionInfo.h" #include "X86TargetMachine.h" @@ -35,6 +34,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetOptions.h" @@ -60,8 +60,10 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { if (Subtarget->isTargetCOFF()) { bool Intrn = MF.getFunction()->hasInternalLinkage(); OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::C_STAT : COFF::C_EXT); - OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT); + OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC + : COFF::IMAGE_SYM_CLASS_EXTERNAL); + OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); OutStreamer.EndCOFFSymbolDef(); } @@ -582,8 +584,9 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { E = COFFMMI.externals_end(); I != E; ++I) { OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer.EmitCOFFSymbolStorageClass(COFF::C_EXT); - OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT); + OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL); + OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); OutStreamer.EndCOFFSymbolDef(); } diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 09f150b..e67fc06 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -154,15 +154,13 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; case X86II::MO_TLVP_PIC_BASE: - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); - // Subtract the pic base. - Expr - = MCBinaryExpr::CreateSub(Expr, - MCSymbolRefExpr::Create(GetPICBaseSymbol(), - Ctx), - Ctx); - - break; + Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); + // Subtract the pic base. + Expr = MCBinaryExpr::CreateSub(Expr, + MCSymbolRefExpr::Create(GetPICBaseSymbol(), + Ctx), + Ctx); + break; case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break; case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break; case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break; diff --git a/lib/Target/X86/X86COFF.h b/lib/Target/X86/X86COFF.h deleted file mode 100644 index 0a8e4e6..0000000 --- a/lib/Target/X86/X86COFF.h +++ /dev/null @@ -1,95 +0,0 @@ -//===--- X86COFF.h - Some definitions from COFF documentations ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file just defines some symbols found in COFF documentation. They are -// used to emit function type information for COFF targets (Cygwin/Mingw32). -// -//===----------------------------------------------------------------------===// - -#ifndef X86COFF_H -#define X86COFF_H - -namespace COFF -{ -/// Storage class tells where and what the symbol represents -enum StorageClass { - C_EFCN = -1, ///< Physical end of function - C_NULL = 0, ///< No symbol - C_AUTO = 1, ///< External definition - C_EXT = 2, ///< External symbol - C_STAT = 3, ///< Static - C_REG = 4, ///< Register variable - C_EXTDEF = 5, ///< External definition - C_LABEL = 6, ///< Label - C_ULABEL = 7, ///< Undefined label - C_MOS = 8, ///< Member of structure - C_ARG = 9, ///< Function argument - C_STRTAG = 10, ///< Structure tag - C_MOU = 11, ///< Member of union - C_UNTAG = 12, ///< Union tag - C_TPDEF = 13, ///< Type definition - C_USTATIC = 14, ///< Undefined static - C_ENTAG = 15, ///< Enumeration tag - C_MOE = 16, ///< Member of enumeration - C_REGPARM = 17, ///< Register parameter - C_FIELD = 18, ///< Bit field - - C_BLOCK = 100, ///< ".bb" or ".eb" - beginning or end of block - C_FCN = 101, ///< ".bf" or ".ef" - beginning or end of function - C_EOS = 102, ///< End of structure - C_FILE = 103, ///< File name - C_LINE = 104, ///< Line number, reformatted as symbol - C_ALIAS = 105, ///< Duplicate tag - C_HIDDEN = 106 ///< External symbol in dmert public lib -}; - -/// The type of the symbol. This is made up of a base type and a derived type. -/// For example, pointer to int is "pointer to T" and "int" -enum SymbolType { - T_NULL = 0, ///< No type info - T_ARG = 1, ///< Void function argument (only used by compiler) - T_VOID = 1, ///< The same as above. Just named differently in some specs. - T_CHAR = 2, ///< Character - T_SHORT = 3, ///< Short integer - T_INT = 4, ///< Integer - T_LONG = 5, ///< Long integer - T_FLOAT = 6, ///< Floating point - T_DOUBLE = 7, ///< Double word - T_STRUCT = 8, ///< Structure - T_UNION = 9, ///< Union - T_ENUM = 10, ///< Enumeration - T_MOE = 11, ///< Member of enumeration - T_UCHAR = 12, ///< Unsigned character - T_USHORT = 13, ///< Unsigned short - T_UINT = 14, ///< Unsigned integer - T_ULONG = 15 ///< Unsigned long -}; - -/// Derived type of symbol -enum SymbolDerivedType { - DT_NON = 0, ///< No derived type - DT_PTR = 1, ///< Pointer to T - DT_FCN = 2, ///< Function returning T - DT_ARY = 3 ///< Array of T -}; - -/// Masks for extracting parts of type -enum SymbolTypeMasks { - N_BTMASK = 017, ///< Mask for base type - N_TMASK = 060 ///< Mask for derived type -}; - -/// Offsets of parts of type -enum Shifts { - N_BTSHFT = 4 ///< Type is formed as (base + derived << N_BTSHIFT) -}; - -} - -#endif // X86COFF_H diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index cdde24a..ce13707 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -540,7 +540,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { StubAM.GVOpFlags = GVFlags; // Prepare for inserting code in the local-value area. - MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea(); + SavePoint SaveInsertPt = enterLocalValueArea(); if (TLI.getPointerTy() == MVT::i64) { Opc = X86::MOV64rm; @@ -1279,12 +1279,11 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { return false; // First issue a copy to GR16_ABCD or GR32_ABCD. - unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr; const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; unsigned CopyReg = createResultReg(CopyRC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CopyOpc), CopyReg) - .addReg(InputReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + CopyReg).addReg(InputReg); // Then issue an extract_subreg. unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1a63474..b3c4886 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2458,17 +2458,23 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // If the tailcall address may be in a register, then make sure it's // possible to register allocate for it. In 32-bit, the call address can // only target EAX, EDX, or ECX since the tail call must be scheduled after - // callee-saved registers are restored. In 64-bit, it's RAX, RCX, RDX, RSI, - // RDI, R8, R9, R11. - if (!isa<GlobalAddressSDNode>(Callee) && + // callee-saved registers are restored. These happen to be the same + // registers used to pass 'inreg' arguments so watch out for those. + if (!Subtarget->is64Bit() && + !isa<GlobalAddressSDNode>(Callee) && !isa<ExternalSymbolSDNode>(Callee)) { - unsigned Limit = Subtarget->is64Bit() ? 8 : 3; unsigned NumInRegs = 0; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - if (VA.isRegLoc()) { - if (++NumInRegs == Limit) + if (!VA.isRegLoc()) + continue; + unsigned Reg = VA.getLocReg(); + switch (Reg) { + default: break; + case X86::EAX: case X86::EDX: case X86::ECX: + if (++NumInRegs == 3) return false; + break; } } } @@ -7993,7 +7999,6 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, unsigned immOpc, unsigned LoadOpc, unsigned CXchgOpc, - unsigned copyOpc, unsigned notOpc, unsigned EAXreg, TargetRegisterClass *RC, @@ -8070,7 +8075,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, MIB.addReg(tt); (*MIB).addOperand(*argOpers[valArgIndx]); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), EAXreg); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg); MIB.addReg(t1); MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc)); @@ -8081,7 +8086,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, (*MIB).setMemRefs(bInstr->memoperands_begin(), bInstr->memoperands_end()); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg()); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg()); MIB.addReg(EAXreg); // insert branch @@ -8117,7 +8122,6 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, const TargetRegisterClass *RC = X86::GR32RegisterClass; const unsigned LoadOpc = X86::MOV32rm; - const unsigned copyOpc = X86::MOV32rr; const unsigned NotOpc = X86::NOT32r; const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const BasicBlock *LLVM_BB = MBB->getBasicBlock(); @@ -8227,14 +8231,14 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MIB.addReg(t2); (*MIB).addOperand(*argOpers[valArgIndx + 1]); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX); MIB.addReg(t1); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EDX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX); MIB.addReg(t2); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EBX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX); MIB.addReg(t5); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::ECX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX); MIB.addReg(t6); MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B)); @@ -8245,9 +8249,9 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, (*MIB).setMemRefs(bInstr->memoperands_begin(), bInstr->memoperands_end()); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3); MIB.addReg(X86::EAX); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t4); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4); MIB.addReg(X86::EDX); // insert branch @@ -8326,12 +8330,12 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); if (argOpers[valArgIndx]->isReg()) - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2); else MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2); (*MIB).addOperand(*argOpers[valArgIndx]); - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), X86::EAX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX); MIB.addReg(t1); MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr)); @@ -8353,7 +8357,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, (*MIB).setMemRefs(mInstr->memoperands_begin(), mInstr->memoperands_end()); - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg()); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg()); MIB.addReg(X86::EAX); // insert branch @@ -8735,25 +8739,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, X86::AND32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass); case X86::ATOMOR32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, X86::OR32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass); case X86::ATOMXOR32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, X86::XOR32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass); case X86::ATOMNAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, X86::AND32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass, true); case X86::ATOMMIN32: @@ -8768,25 +8772,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, X86::AND16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass); case X86::ATOMOR16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr, X86::OR16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass); case X86::ATOMXOR16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr, X86::XOR16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass); case X86::ATOMNAND16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, X86::AND16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass, true); case X86::ATOMMIN16: @@ -8801,25 +8805,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, X86::AND8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass); case X86::ATOMOR8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr, X86::OR8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass); case X86::ATOMXOR8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr, X86::XOR8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass); case X86::ATOMNAND8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, X86::AND8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass, true); // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. @@ -8827,25 +8831,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, X86::AND64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass); case X86::ATOMOR64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr, X86::OR64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass); case X86::ATOMXOR64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr, X86::XOR64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass); case X86::ATOMNAND64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, X86::AND64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass, true); case X86::ATOMMIN64: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 2d28e5c..4e4daa4 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -764,7 +764,6 @@ namespace llvm { unsigned immOpc, unsigned loadOpc, unsigned cxchgOpc, - unsigned copyOpc, unsigned notOpc, unsigned EAXreg, TargetRegisterClass *RC, diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 97578af..cc3fdf1 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -106,6 +106,7 @@ class VEX { bit hasVEXPrefix = 1; } class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; } +class VEX_L { bit hasVEX_L = 1; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, Domain d = GenericDomain> @@ -138,6 +139,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field? bit hasVEX_i8ImmReg = 0; // Does this inst requires the last source register // to be encoded in a immediate field? + bit hasVEX_L = 0; // Does this inst uses large (256-bit) registers? // TSFlags layout should be kept in sync with X86InstrInfo.h. let TSFlags{5-0} = FormBits; @@ -155,6 +157,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{33} = hasVEX_WPrefix; let TSFlags{34} = hasVEX_4VPrefix; let TSFlags{35} = hasVEX_i8ImmReg; + let TSFlags{36} = hasVEX_L; } class I<bits<8> o, Format f, dag outs, dag ins, string asm, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index f762b58..ad0217a 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -453,7 +453,13 @@ namespace X86II { // VEX_I8IMM - Specifies that the last register used in a AVX instruction, // must be encoded in the i8 immediate field. This usually happens in // instructions with 4 operands. - VEX_I8IMM = 1ULL << 35 + VEX_I8IMM = 1ULL << 35, + + // VEX_L - Stands for a bit in the VEX opcode prefix meaning the current + // instruction uses 256-bit wide registers. This is usually auto detected if + // a VR256 register is used, but some AVX instructions also have this field + // marked when using a f256 memory references. + VEX_L = 1ULL << 36 }; // getBaseOpcodeFor - This function returns the "base" X86 opcode for the diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ab0005b..ebe161b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -666,6 +666,9 @@ defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle>, TB, VEX; +defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load, + "cvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB, VEX; } let Pattern = []<dag> in { defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, @@ -806,9 +809,13 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), // Convert packed single/double fp to doubleword let isAsmParserOnly = 1 in { def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; } def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", []>; @@ -862,6 +869,10 @@ def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; } def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>; @@ -912,14 +923,39 @@ def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memop addr:$src)))]>; +let isAsmParserOnly = 1 in { +// The assembler can recognize rr 256-bit instructions by seeing a ymm +// register, but the same isn't true when using memory operands instead. +// Provide other assembly rr and rm forms to address this explicitly. +def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; + +// XMM only +def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; + +// YMM only +def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), + "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; +} + // Convert packed single to packed double -let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX, - Requires<[HasAVX]>; + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX, - Requires<[HasAVX]>; + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; } def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; @@ -949,10 +985,25 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), // Convert packed double to packed single let isAsmParserOnly = 1 in { +// The assembler can recognize rr 256-bit instructions by seeing a ymm +// register, but the same isn't true when using memory operands instead. +// Provide other assembly rr and rm forms to address this explicitly. def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; -// FIXME: the memory form of this instruction should described using -// use extra asm syntax + "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; + +// XMM only +def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX; + +// YMM only +def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), + "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; } def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; @@ -1142,6 +1193,16 @@ let isAsmParserOnly = 1 in { "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", SSEPackedDouble>, OpSize, VEX_4V; + let Pattern = []<dag> in { + defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_sse_cmp_ps, + "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedSingle>, VEX_4V; + defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_sse2_cmp_pd, + "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + } } let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, @@ -2935,19 +2996,46 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // SSE3 - Conversion Instructions //===---------------------------------------------------------------------===// +// Convert Packed Double FP to Packed DW Integers let isAsmParserOnly = 1, Predicates = [HasAVX] in { +// The assembler can recognize rr 256-bit instructions by seeing a ymm +// register, but the same isn't true when using memory operands instead. +// Provide other assembly rr and rm forms to address this explicitly. def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; + +// XMM only +def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; + +// YMM only +def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), + "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; } def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; + +// Convert Packed DW Integers to Packed Double FP +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +} + def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 633ddd4..23b0666 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -432,6 +432,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if (TSFlags & X86II::VEX_W) VEX_W = 1; + if (TSFlags & X86II::VEX_L) + VEX_L = 1; + switch (TSFlags & X86II::Op0Mask) { default: assert(0 && "Invalid prefix!"); case X86II::T8: // 0F 38 |