diff options
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/AsmParser/X86AsmParser.cpp | 13 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86Disassembler.cpp | 8 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoder.h | 2 | ||||
-rw-r--r-- | lib/Target/X86/README.txt | 82 | ||||
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 8 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 50 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 18 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFormats.td | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.h | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSystem.td | 5 | ||||
-rw-r--r-- | lib/Target/X86/X86MCCodeEmitter.cpp | 8 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 7 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.h | 2 |
15 files changed, 154 insertions, 62 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 1cac07a..8fe549b 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -775,6 +775,19 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, delete &Op; } } + // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al". + if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") && + Operands.size() == 3) { + X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + if (Op.isMem() && Op.Mem.SegReg == 0 && + isa<MCConstantExpr>(Op.Mem.Disp) && + cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && + Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { + SMLoc Loc = Op.getEndLoc(); + Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); + delete &Op; + } + } // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to // "shift <op>". diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 691e2d7..f777756 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -168,16 +168,16 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, switch (insn.displacementSize) { default: break; - case 8: + case 1: type = TYPE_MOFFS8; break; - case 16: + case 2: type = TYPE_MOFFS16; break; - case 32: + case 4: type = TYPE_MOFFS32; break; - case 64: + case 8: type = TYPE_MOFFS64; break; } diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 4f4fbcd..d0dc8b5 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -399,7 +399,7 @@ struct InternalInstruction { /* The segment override type */ SegmentOverride segmentOverride; - /* Sizes of various critical pieces of data */ + /* Sizes of various critical pieces of data, in bytes */ uint8_t registerSize; uint8_t addressSize; uint8_t displacementSize; diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index c10e170..abd1515 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1879,39 +1879,71 @@ _add32carry: //===---------------------------------------------------------------------===// -This: -char t(char c) { - return c/3; +The hot loop of 256.bzip2 contains code that looks a bit like this: + +int foo(char *P, char *Q, int x, int y) { + if (P[0] != Q[0]) + return P[0] < Q[0]; + if (P[1] != Q[1]) + return P[1] < Q[1]; + if (P[2] != Q[2]) + return P[2] < Q[2]; + return P[3] < Q[3]; } -Compiles to: $clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer +In the real code, we get a lot more wrong than this. However, even in this +code we generate: -_t: ## @t - movslq %edi, %rax - imulq $-1431655765, %rax, %rcx ## imm = 0xFFFFFFFFAAAAAAAB - shrq $32, %rcx - addl %ecx, %eax - movl %eax, %ecx - shrl $31, %ecx - shrl %eax - addl %ecx, %eax - movsbl %al, %eax +_foo: ## @foo +## BB#0: ## %entry + movb (%rsi), %al + movb (%rdi), %cl + cmpb %al, %cl + je LBB0_2 +LBB0_1: ## %if.then + cmpb %al, %cl + jmp LBB0_5 +LBB0_2: ## %if.end + movb 1(%rsi), %al + movb 1(%rdi), %cl + cmpb %al, %cl + jne LBB0_1 +## BB#3: ## %if.end38 + movb 2(%rsi), %al + movb 2(%rdi), %cl + cmpb %al, %cl + jne LBB0_1 +## BB#4: ## %if.end60 + movb 3(%rdi), %al + cmpb 3(%rsi), %al +LBB0_5: ## %if.end60 + setl %al + movzbl %al, %eax ret -GCC gets: +Note that we generate jumps to LBB0_1 which does a redundant compare. The +redundant compare also forces the register values to be live, which prevents +folding one of the loads into the compare. In contrast, GCC 4.2 produces: -_t: - movl $86, %eax - imulb %dil - shrw $8, %ax - sarb $7, %dil - subb %dil, %al - movsbl %al,%eax +_foo: + movzbl (%rsi), %eax + cmpb %al, (%rdi) + jne L10 +L12: + movzbl 1(%rsi), %eax + cmpb %al, 1(%rdi) + jne L10 + movzbl 2(%rsi), %eax + cmpb %al, 2(%rdi) + jne L10 + movzbl 3(%rdi), %eax + cmpb 3(%rsi), %al +L10: + setl %al + movzbl %al, %eax ret -which is nicer. This also happens for int, not just char. +which is "perfect". //===---------------------------------------------------------------------===// - - diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 9d42ac2..6fa9284 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -597,9 +597,13 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { (AM.Base.Reg != 0 || AM.IndexReg != 0)) return false; - // Can't handle TLS or DLLImport. + // Can't handle DLLImport. + if (GV->hasDLLImportLinkage()) + return false; + + // Can't handle TLS. if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) - if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage()) + if (GVar->isThreadLocal()) return false; // Okay, we've committed to selecting this global. Set up the basic address. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 27024b4..2f49dbc 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -45,7 +45,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/VectorExtras.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" @@ -56,10 +55,6 @@ using namespace dwarf; STATISTIC(NumTailCalls, "Number of tail calls"); -static cl::opt<bool> -Disable256Bit("disable-256bit", cl::Hidden, - cl::desc("Disable use of 256-bit vectors")); - // Forward declarations. static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); @@ -225,7 +220,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; // X86 is weird, it always uses i8 for shift amounts and setcc results. - setShiftAmountType(MVT::i8); setBooleanContents(ZeroOrOneBooleanContent); setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); @@ -1713,7 +1707,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, else llvm_unreachable("Unknown argument type!"); - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it is really passed promoted to 32 @@ -1845,7 +1839,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, DAG.getIntPtrConstant(Offset)); unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs], - X86::GR64RegisterClass, dl); + X86::GR64RegisterClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -1861,7 +1855,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SmallVector<SDValue, 11> SaveXMMOps; SaveXMMOps.push_back(Chain); - unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass, dl); + unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass); SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8); SaveXMMOps.push_back(ALVal); @@ -1872,7 +1866,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs], - X86::VR128RegisterClass, dl); + X86::VR128RegisterClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32); SaveXMMOps.push_back(Val); } @@ -2693,6 +2687,10 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: @@ -2760,6 +2758,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: @@ -4178,7 +4180,8 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(Opc, dl, ShVT, SrcOp, - DAG.getConstant(NumBits, TLI.getShiftAmountTy()))); + DAG.getConstant(NumBits, + TLI.getShiftAmountTy(SrcOp.getValueType())))); } SDValue @@ -4327,16 +4330,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // For AVX-length vectors, build the individual 128-bit pieces and // use shuffles to put them in place. - if (VT.getSizeInBits() > 256 && - Subtarget->hasAVX() && - !Disable256Bit && + if (VT.getSizeInBits() > 256 && + Subtarget->hasAVX() && !ISD::isBuildVectorAllZeros(Op.getNode())) { SmallVector<SDValue, 8> V; V.resize(NumElems); for (unsigned i = 0; i < NumElems; ++i) { V[i] = Op.getOperand(i); } - + EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); // Build the lower subvector. @@ -5044,7 +5046,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DAG.getIntPtrConstant(Elt1 / 2)); if ((Elt1 & 1) == 0) InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt, - DAG.getConstant(8, TLI.getShiftAmountTy())); + DAG.getConstant(8, + TLI.getShiftAmountTy(InsElt.getValueType()))); else if (Elt0 >= 0) InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt, DAG.getConstant(0xFF00, MVT::i16)); @@ -5058,7 +5061,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, Elt0Src, DAG.getIntPtrConstant(Elt0 / 2)); if ((Elt0 & 1) != 0) InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0, - DAG.getConstant(8, TLI.getShiftAmountTy())); + DAG.getConstant(8, + TLI.getShiftAmountTy(InsElt0.getValueType()))); else if (Elt1 >= 0) InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0, DAG.getConstant(0x00FF, MVT::i16)); @@ -5475,7 +5479,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { // Both of them can't be memory operations though. if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2)) CanFoldLoad = false; - + if (CanFoldLoad) { if (HasSSE2 && NumElems == 2) return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); @@ -6088,7 +6092,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue ScaledN2 = N2; if (Upper) ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2, - DAG.getConstant(NumElems / + DAG.getConstant(NumElems / (VT.getSizeInBits() / 128), N2.getValueType())); Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0, @@ -9327,6 +9331,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; + case X86ISD::VUNPCKLPS: return "X86ISD::VUNPCKLPS"; + case X86ISD::VUNPCKLPD: return "X86ISD::VUNPCKLPD"; + case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY"; + case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY"; case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW"; @@ -11984,6 +11992,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: case X86ISD::MOVHLPS: case X86ISD::MOVLHPS: case X86ISD::PSHUFD: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 419da37..6ec4a7d 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -159,16 +159,16 @@ namespace llvm { /// PSHUFB - Shuffle 16 8-bit values within a vector. PSHUFB, - + /// PANDN - and with not'd value. PANDN, - + /// PSIGNB/W/D - Copy integer sign. - PSIGNB, PSIGNW, PSIGND, - + PSIGNB, PSIGNW, PSIGND, + /// PBLENDVB - Variable blend PBLENDVB, - + /// FMAX, FMIN - Floating point max and min. /// FMAX, FMIN, @@ -212,7 +212,7 @@ namespace llvm { // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. ADD, SUB, ADC, SBB, SMUL, INC, DEC, OR, XOR, AND, - + UMUL, // LOW, HI, FLAGS = umul LHS, RHS // MUL_IMM - X86 specific multiply by immediate. @@ -248,6 +248,10 @@ namespace llvm { MOVSS, UNPCKLPS, UNPCKLPD, + VUNPCKLPS, + VUNPCKLPD, + VUNPCKLPSY, + VUNPCKLPDY, UNPCKHPS, UNPCKHPD, PUNPCKLBW, @@ -463,6 +467,8 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; } + virtual const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 344c14c..0660072 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -41,6 +41,8 @@ def MRM_F8 : Format<41>; def MRM_F9 : Format<42>; def RawFrmImm8 : Format<43>; def RawFrmImm16 : Format<44>; +def MRM_D0 : Format<45>; +def MRM_D1 : Format<46>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index ceb1b65..76a9b12 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -369,8 +369,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, - { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, - { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, @@ -568,6 +566,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL16rr, X86::IMUL16rm, 0 }, { X86::IMUL32rr, X86::IMUL32rm, 0 }, { X86::IMUL64rr, X86::IMUL64rm, 0 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, { X86::MAXPDrr, X86::MAXPDrm, 16 }, { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, { X86::MAXPSrr, X86::MAXPSrm, 16 }, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 1d44207..fcb5a25 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -311,6 +311,8 @@ namespace X86II { MRM_F0 = 40, MRM_F8 = 41, MRM_F9 = 42, + MRM_D0 = 45, + MRM_D1 = 46, /// RawFrmImm8 - This is used for the ENTER instruction, which has two /// immediates, the first of which is a 16-bit immediate (specified by @@ -577,6 +579,8 @@ namespace X86II { case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9: + case X86II::MRM_D0: + case X86II::MRM_D1: return -1; } } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 87dc4be..f832a7c 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1296,6 +1296,9 @@ def : MnemonicAlias<"lret", "lretl">; def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>; def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>; +def : MnemonicAlias<"loopz", "loope">; +def : MnemonicAlias<"loopnz", "loopne">; + def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>; def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>; def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>; diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 1a58ba0..6a24d14 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -388,3 +388,8 @@ def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB; def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB; def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; +let Defs = [RDX, RAX], Uses = [RCX] in + def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; + +let Uses = [RDX, RAX, RCX] in + def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB; diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index e6dc74e..0e3b571 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -979,6 +979,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(BaseOpcode, CurByte, OS); EmitByte(0xF9, CurByte, OS); break; + case X86II::MRM_D0: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xD0, CurByte, OS); + break; + case X86II::MRM_D1: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xD1, CurByte, OS); + break; } // If there is a remaining operand, it must be a trailing immediate. Emit it diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index de76856..1ee7312 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -342,9 +342,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, assert((!Is64Bit || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - // Stack alignment is 16 bytes on Darwin and Linux (both 32 and 64 bit) and - // for all 64-bit targets. - if (isTargetDarwin() || isTargetLinux() || Is64Bit) + // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both + // 32 and 64 bit) and for all 64-bit targets. + if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() || + isTargetSolaris() || Is64Bit) stackAlignment = 16; if (StackAlignment) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 8a119b4..0a62a02 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -166,6 +166,8 @@ public: bool hasVectorUAMem() const { return HasVectorUAMem; } bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } + bool isTargetFreeBSD() const { return TargetTriple.getOS() == Triple::FreeBSD; } + bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; } // ELF is a reasonably sane default and the only other X86 targets we // support are Darwin and Windows. Just use "not those". |