diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2009-10-23 14:19:52 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2009-10-23 14:19:52 +0000 |
commit | 9643cca39fb9fb3b49a8912926de98acf882283c (patch) | |
tree | 22cc59e4b240d84c3a5a60531119c4eca914a256 /lib/Target/X86 | |
parent | 1adacceba9c9ee0f16e54388e56c9a249b296f75 (diff) | |
download | FreeBSD-src-9643cca39fb9fb3b49a8912926de98acf882283c.zip FreeBSD-src-9643cca39fb9fb3b49a8912926de98acf882283c.tar.gz |
Update LLVM to r84949.
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp | 4 | ||||
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h | 3 | ||||
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp | 80 | ||||
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86MCInstLower.cpp | 15 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 98 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 16 | ||||
-rw-r--r-- | lib/Target/X86/X86Instr64bit.td | 24 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFormats.td | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 19 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 33 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.h | 17 |
12 files changed, 211 insertions, 104 deletions
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index bc70ffe..8ec5b62 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -57,9 +57,7 @@ void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) { } } -void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - const char *Modifier) { - assert(Modifier == 0 && "Modifiers should not be used"); +void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h index 5f28fa4..3180618 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h @@ -32,8 +32,7 @@ public: static const char *getRegisterName(unsigned RegNo); - void printOperand(const MCInst *MI, unsigned OpNo, - const char *Modifier = 0); + void printOperand(const MCInst *MI, unsigned OpNo); void printMemReference(const MCInst *MI, unsigned Op); void printLeaMemReference(const MCInst *MI, unsigned Op); void printSSECC(const MCInst *MI, unsigned Op); diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 2a0290d..ae8e6d3 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -225,7 +225,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { std::string Name = Mang->getMangledName(GV, Suffix, Suffix[0] != '\0'); if (Subtarget->isTargetCygMing()) { - X86COFFMachineModuleInfo &COFFMMI = + X86COFFMachineModuleInfo &COFFMMI = MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); COFFMMI.DecorateCygMingName(Name, GV, *TM.getTargetData()); } @@ -288,12 +288,12 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { std::string Name = Mang->makeNameProper(MO.getSymbolName()); if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { Name += "$stub"; - MCSymbol *Sym = OutContext.GetOrCreateSymbol(Name); + MCSymbol *Sym = OutContext.GetOrCreateSymbol(StringRef(Name)); const MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); if (StubSym == 0) { Name.erase(Name.end()-5, Name.end()); - StubSym = OutContext.GetOrCreateSymbol(Name); + StubSym = OutContext.GetOrCreateSymbol(StringRef(Name)); } } @@ -870,49 +870,55 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // implementation of multiple entry points). If this doesn't occur, the // linker can safely perform dead code stripping. Since LLVM never // generates code that does this, it is always safe to set. - O << "\t.subsections_via_symbols\n"; - } - - if (Subtarget->isTargetCOFF()) { - // Necessary for dllexport support - std::vector<std::string> DLLExportedFns, DLLExportedGlobals; + OutStreamer.EmitAssemblerFlag(MCStreamer::SubsectionsViaSymbols); + } - X86COFFMachineModuleInfo &COFFMMI = + if (Subtarget->isTargetCOFF()) { + X86COFFMachineModuleInfo &COFFMMI = MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - TargetLoweringObjectFileCOFF &TLOFCOFF = - static_cast<TargetLoweringObjectFileCOFF&>(getObjFileLowering()); - for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) - if (I->hasDLLExportLinkage()) - DLLExportedFns.push_back(Mang->getMangledName(I)); - - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (I->hasDLLExportLinkage()) - DLLExportedGlobals.push_back(Mang->getMangledName(I)); - - if (Subtarget->isTargetCygMing()) { - // Emit type information for external functions - for (X86COFFMachineModuleInfo::stub_iterator I = COFFMMI.stub_begin(), + // Emit type information for external functions + for (X86COFFMachineModuleInfo::stub_iterator I = COFFMMI.stub_begin(), E = COFFMMI.stub_end(); I != E; ++I) { - O << "\t.def\t " << I->getKeyData() + O << "\t.def\t " << I->getKeyData() << ";\t.scl\t" << COFF::C_EXT << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) << ";\t.endef\n"; - } } - - // Output linker support code for dllexported globals on windows. - if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) { - OutStreamer.SwitchSection(TLOFCOFF.getCOFFSection(".section .drectve", - true, + + if (Subtarget->isTargetCygMing()) { + // Necessary for dllexport support + std::vector<std::string> DLLExportedFns, DLLExportedGlobals; + + TargetLoweringObjectFileCOFF &TLOFCOFF = + static_cast<TargetLoweringObjectFileCOFF&>(getObjFileLowering()); + + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->hasDLLExportLinkage()) { + std::string Name = Mang->getMangledName(I); + COFFMMI.DecorateCygMingName(Name, I, *TM.getTargetData()); + DLLExportedFns.push_back(Name); + } + + for (Module::const_global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) + if (I->hasDLLExportLinkage()) { + std::string Name = Mang->getMangledName(I); + COFFMMI.DecorateCygMingName(Name, I, *TM.getTargetData()); + DLLExportedGlobals.push_back(Mang->getMangledName(I)); + } + + // Output linker support code for dllexported globals on windows. + if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) { + OutStreamer.SwitchSection(TLOFCOFF.getCOFFSection(".section .drectve", + true, SectionKind::getMetadata())); - - for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) - O << "\t.ascii \" -export:" << DLLExportedGlobals[i] << ",data\"\n"; - - for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) - O << "\t.ascii \" -export:" << DLLExportedFns[i] << "\"\n"; + for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) + O << "\t.ascii \" -export:" << DLLExportedGlobals[i] << ",data\"\n"; + + for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) + O << "\t.ascii \" -export:" << DLLExportedFns[i] << "\"\n"; + } } } } diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 5ccddf5..d498c57 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -38,10 +38,8 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { MCSymbol *X86MCInstLower::GetPICBaseSymbol() const { - SmallString<60> Name; - raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() - << AsmPrinter.getFunctionNumber() << "$pb"; - return Ctx.GetOrCreateSymbol(Name.str()); + return Ctx.GetOrCreateSymbol(Twine(AsmPrinter.MAI->getPrivateGlobalPrefix())+ + Twine(AsmPrinter.getFunctionNumber())+"$pb"); } @@ -308,6 +306,8 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { MI->dump(); llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) continue; MCOp = MCOperand::CreateReg(MO.getReg()); break; case MachineOperand::MO_Immediate: @@ -449,10 +449,9 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { // MYGLOBAL + (. - PICBASE) // However, we can't generate a ".", so just emit a new label here and refer // to it. We know that this operand flag occurs at most once per function. - SmallString<64> Name; - raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() - << "picbaseref" << getFunctionNumber(); - MCSymbol *DotSym = OutContext.GetOrCreateSymbol(Name.str()); + const char *Prefix = MAI->getPrivateGlobalPrefix(); + MCSymbol *DotSym = OutContext.GetOrCreateSymbol(Twine(Prefix)+"picbaseref"+ + Twine(getFunctionNumber())); OutStreamer.EmitLabel(DotSym); // Now that we have emitted the label, lower the complex operand expression. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fadc818..e5e7bc8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2286,6 +2286,8 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, case ISD::SETNE: return X86::COND_NE; case ISD::SETUO: return X86::COND_P; case ISD::SETO: return X86::COND_NP; + case ISD::SETOEQ: + case ISD::SETUNE: return X86::COND_INVALID; } } @@ -2389,6 +2391,56 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { return ::isPSHUFLWMask(M, N->getValueType(0)); } +/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PALIGNR. +static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, + bool hasSSSE3) { + int i, e = VT.getVectorNumElements(); + + // Do not handle v2i64 / v2f64 shuffles with palignr. + if (e < 4 || !hasSSSE3) + return false; + + for (i = 0; i != e; ++i) + if (Mask[i] >= 0) + break; + + // All undef, not a palignr. + if (i == e) + return false; + + // Determine if it's ok to perform a palignr with only the LHS, since we + // don't have access to the actual shuffle elements to see if RHS is undef. + bool Unary = Mask[i] < (int)e; + bool NeedsUnary = false; + + int s = Mask[i] - i; + + // Check the rest of the elements to see if they are consecutive. + for (++i; i != e; ++i) { + int m = Mask[i]; + if (m < 0) + continue; + + Unary = Unary && (m < (int)e); + NeedsUnary = NeedsUnary || (m < s); + + if (NeedsUnary && !Unary) + return false; + if (Unary && m != ((s+i) & (e-1))) + return false; + if (!Unary && m != (s+i)) + return false; + } + return true; +} + +bool X86::isPALIGNRMask(ShuffleVectorSDNode *N) { + SmallVector<int, 8> M; + N->getMask(M); + return ::isPALIGNRMask(M, N->getValueType(0), true); +} + /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to SHUFP*. static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) { @@ -2733,8 +2785,7 @@ bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { } /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle -/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* -/// instructions. +/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions. unsigned X86::getShuffleSHUFImmediate(SDNode *N) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); int NumOperands = SVOp->getValueType(0).getVectorNumElements(); @@ -2753,8 +2804,7 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) { } /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle -/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW -/// instructions. +/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction. unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); unsigned Mask = 0; @@ -2770,8 +2820,7 @@ unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { } /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle -/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW -/// instructions. +/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction. unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); unsigned Mask = 0; @@ -2786,6 +2835,23 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { return Mask; } +/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. +unsigned X86::getShufflePALIGNRImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + EVT VVT = N->getValueType(0); + unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3; + int Val = 0; + + unsigned i, e; + for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) { + Val = SVOp->getMaskElt(i); + if (Val >= 0) + break; + } + return (Val - i) * EltSize; +} + /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { @@ -5502,6 +5568,8 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); + if (X86CC == X86::COND_INVALID) + return SDValue(); SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG); return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, @@ -5650,8 +5718,11 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); SDValue CC; - if (Cond.getOpcode() == ISD::SETCC) - Cond = LowerSETCC(Cond, DAG); + if (Cond.getOpcode() == ISD::SETCC) { + SDValue NewCond = LowerSETCC(Cond, DAG); + if (NewCond.getNode()) + Cond = NewCond; + } // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. @@ -5724,8 +5795,11 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); SDValue CC; - if (Cond.getOpcode() == ISD::SETCC) - Cond = LowerSETCC(Cond, DAG); + if (Cond.getOpcode() == ISD::SETCC) { + SDValue NewCond = LowerSETCC(Cond, DAG); + if (NewCond.getNode()) + Cond = NewCond; + } #if 0 // FIXME: LowerXALUO doesn't handle these!! else if (Cond.getOpcode() == X86ISD::ADD || @@ -6274,6 +6348,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG); + assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!"); SDValue Cond = DAG.getNode(Opc, dl, MVT::i32, LHS, RHS); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), Cond); @@ -7274,7 +7349,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, if (VT.getSizeInBits() == 64) return false; - // FIXME: pshufb, blends, palignr, shifts. + // FIXME: pshufb, blends, shifts. return (VT.getVectorNumElements() == 2 || ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isMOVLMask(M, VT) || @@ -7282,6 +7357,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isPSHUFDMask(M, VT) || isPSHUFHWMask(M, VT) || isPSHUFLWMask(M, VT) || + isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) || isUNPCKLMask(M, VT) || isUNPCKHMask(M, VT) || isUNPCKL_v_undef_Mask(M, VT) || diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 2f7b8ba..66a9107 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -323,21 +323,27 @@ namespace llvm { /// specifies a shuffle of elements that is suitable for input to MOVDDUP. bool isMOVDDUPMask(ShuffleVectorSDNode *N); + /// isPALIGNRMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to PALIGNR. + bool isPALIGNRMask(ShuffleVectorSDNode *N); + /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. unsigned getShuffleSHUFImmediate(SDNode *N); /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle - /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW - /// instructions. + /// the specified VECTOR_SHUFFLE mask with PSHUFHW instruction. unsigned getShufflePSHUFHWImmediate(SDNode *N); - /// getShufflePSHUFKWImmediate - Return the appropriate immediate to shuffle - /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW - /// instructions. + /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle + /// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction. unsigned getShufflePSHUFLWImmediate(SDNode *N); + /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle + /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. + unsigned getShufflePALIGNRImmediate(SDNode *N); + /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool isZeroNode(SDValue Elt); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index ef19823..c1b7b8f 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -368,19 +368,15 @@ def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), // Use movzbl instead of movzbq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), - "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR64:$dst, (zext GR8:$src))]>, TB; + "", [(set GR64:$dst, (zext GR8:$src))]>, TB; def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), - "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB; + "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB; // Use movzwl instead of movzwq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR64:$dst, (zext GR16:$src))]>, TB; + "", [(set GR64:$dst, (zext GR16:$src))]>, TB; def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; + "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; // There's no movzlq instruction, but movl can be used for this purpose, using // implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero @@ -390,11 +386,9 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), // necessarily all zero. In such cases, we fall back to these explicit zext // instructions. def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), - "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR64:$dst, (zext GR32:$src))]>; + "", [(set GR64:$dst, (zext GR32:$src))]>; def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), - "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; + "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; // Any instruction that defines a 32-bit result leaves the high half of the // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may @@ -1455,8 +1449,7 @@ def : Pat<(i64 0), // Materialize i64 constant where top 32-bits are zero. let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), - "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR64:$dst, i64immZExt32:$src)]>; + "", [(set GR64:$dst, i64immZExt32:$src)]>; //===----------------------------------------------------------------------===// // Thread Local Storage Instructions @@ -1515,6 +1508,7 @@ def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val) } // Optimized codegen when the non-memory output is not used. +let Defs = [EFLAGS] in { // FIXME: Use normal add / sub instructions and add lock prefix dynamically. def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "lock\n\t" @@ -1544,7 +1538,7 @@ def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "lock\n\t" "dec{q}\t$dst", []>, LOCK; - +} // Atomic exchange, and, or, xor let Constraints = "$val = $dst", Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in { diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index abdb313..2f14bb0 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -224,10 +224,10 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>; + : Ii8<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>; class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>; + : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>; // SSE4.1 Instruction Templates: // diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 30b57d8..16b2af7 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -3392,11 +3392,9 @@ def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2), // of the register here. This has a smaller encoding and avoids a // partial-register update. def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), - "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR16:$dst, (sext GR8:$src))]>, TB; + "", [(set GR16:$dst, (sext GR8:$src))]>, TB; def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), - "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB; + "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB; def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sext GR8:$src))]>, TB; @@ -3414,11 +3412,9 @@ def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), // of the register here. This has a smaller encoding and avoids a // partial-register update. def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), - "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR16:$dst, (zext GR8:$src))]>, TB; + "", [(set GR16:$dst, (zext GR8:$src))]>, TB; def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), - "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB; + "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB; def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zext GR8:$src))]>, TB; @@ -3474,9 +3470,8 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), [(set GR8:$dst, 0)]>; // Use xorl instead of xorw since we don't care about the high 16 bits, // it's smaller, and it avoids a partial-register update. -def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), - "xor{l}\t${dst:subreg32}, ${dst:subreg32}", - [(set GR16:$dst, 0)]>; +def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), + "", [(set GR16:$dst, 0)]>; def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "xor{l}\t$dst, $dst", [(set GR32:$dst, 0)]>; @@ -3598,6 +3593,7 @@ def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), // Optimized codegen when the non-memory output is not used. // FIXME: Use normal add / sub instructions and add lock prefix dynamically. +let Defs = [EFLAGS] in { def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), "lock\n\t" "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; @@ -3667,6 +3663,7 @@ def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "lock\n\t" "dec{l}\t$dst", []>, LOCK; +} // Atomic exchange, and, or, xor let Constraints = "$val = $dst", Defs = [EFLAGS], diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 96fc932..f4e97c9 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -197,6 +197,12 @@ def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{ return getI8Imm(X86::getShufflePSHUFLWImmediate(N)); }]>; +// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to +// a PALIGNR imm. +def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{ + return getI8Imm(X86::getShufflePALIGNRImmediate(N)); +}]>; + def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); @@ -283,6 +289,11 @@ def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N)); }], SHUFFLE_get_pshuflw_imm>; +def palign : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N)); +}], SHUFFLE_get_palign_imm>; + //===----------------------------------------------------------------------===// // SSE scalar FP Instructions //===----------------------------------------------------------------------===// @@ -2062,6 +2073,7 @@ defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>; defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>; // Shuffle and unpack instructions +let AddedComplexity = 5 in { def PSHUFDri : PDIi8<0x70, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -2073,6 +2085,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem, [(set VR128:$dst, (v4i32 (pshufd:$src2 (bc_v4i32(memopv2i64 addr:$src1)), (undef))))]>; +} // SSE2 with ImmT == Imm8 and XS prefix. def PSHUFHWri : Ii8<0x70, MRMSrcReg, @@ -2839,6 +2852,26 @@ let Constraints = "$src1 = $dst" in { imm:$src3))]>, OpSize; } +// palignr patterns. +let AddedComplexity = 5 in { +def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)), + (PALIGNR128rr VR128:$src2, VR128:$src1, + (SHUFFLE_get_palign_imm VR128:$src3))>, + Requires<[HasSSSE3]>; +def : Pat<(v4f32 (palign:$src3 VR128:$src1, VR128:$src2)), + (PALIGNR128rr VR128:$src2, VR128:$src1, + (SHUFFLE_get_palign_imm VR128:$src3))>, + Requires<[HasSSSE3]>; +def : Pat<(v8i16 (palign:$src3 VR128:$src1, VR128:$src2)), + (PALIGNR128rr VR128:$src2, VR128:$src1, + (SHUFFLE_get_palign_imm VR128:$src3))>, + Requires<[HasSSSE3]>; +def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)), + (PALIGNR128rr VR128:$src2, VR128:$src1, + (SHUFFLE_get_palign_imm VR128:$src3))>, + Requires<[HasSSSE3]>; +} + def : Pat<(X86pshufb VR128:$src, VR128:$mask), (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>; def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index fb76aeb..9525f04 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -382,7 +382,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , HasFMA4(false) , IsBTMemSlow(false) , DarwinVers(0) - , IsLinux(false) , stackAlignment(8) // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) @@ -434,7 +433,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, } else if (TT.find("linux") != std::string::npos) { // Linux doesn't imply ELF, but we don't currently support anything else. TargetType = isELF; - IsLinux = true; } else if (TT.find("cygwin") != std::string::npos) { TargetType = isCygwin; } else if (TT.find("mingw") != std::string::npos) { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index a2e368d..e64b854 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -82,9 +82,6 @@ protected: /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. unsigned char DarwinVers; // Is any darwin-x86 platform. - /// isLinux - true if this is a "linux" platform. - bool IsLinux; - /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -195,11 +192,7 @@ public: /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard, /// 10 = Snow Leopard, etc. unsigned getDarwinVers() const { return DarwinVers; } - - /// isLinux - Return true if the target is "Linux". - bool isLinux() const { return IsLinux; } - - + /// ClassifyGlobalReference - Classify a global variable reference for the /// current subtarget according to how we should reference it in a non-pcrel /// context. @@ -222,6 +215,14 @@ public: /// indicating the number of scheduling cycles of backscheduling that /// should be attempted. unsigned getSpecialAddressLatency() const; + + /// enablePostRAScheduler - X86 target is enabling post-alloc scheduling + /// at 'More' optimization level. + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& mode) const { + mode = TargetSubtarget::ANTIDEP_CRITICAL; + return OptLevel >= CodeGenOpt::Default; + } }; } // End llvm namespace |