diff options
Diffstat (limited to 'contrib/llvm/lib/Target')
280 files changed, 21468 insertions, 8315 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h index 2a1e8e4..1446bbb 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.h +++ b/contrib/llvm/lib/Target/ARM/ARM.h @@ -37,6 +37,7 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); +FunctionPass *createARMGlobalBaseRegPass(); FunctionPass *createARMGlobalMergePass(const TargetLowering* tli); FunctionPass *createARMConstantIslandPass(); FunctionPass *createMLxExpansionPass(); diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index 69e2346..23974ad 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -32,9 +32,6 @@ def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", "Enable VFP3 instructions", [FeatureVFP2]>; -def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", - "Enable VFP4 instructions", - [FeatureVFP3]>; def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", [FeatureVFP3]>; @@ -44,10 +41,16 @@ def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", "Does not support ARM mode execution">; def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", + "Enable VFP4 instructions", + [FeatureVFP3, FeatureFP16]>; def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", "Restrict VFP3 to 16 double registers">; def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", "Enable divide instructions">; +def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", + "HasHardwareDivideInARM", "true", + "Enable divide instructions in ARM mode">; def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", "Enable Thumb2 extract and pack instructions">; def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", @@ -139,6 +142,18 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", [FeatureVMLxForwarding, FeatureT2XtPk, FeatureFP16, FeatureAvoidPartialCPSR]>; +def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", + "Swift ARM processors", + [FeatureNEONForFP, FeatureT2XtPk, + FeatureVFP4, FeatureMP, FeatureHWDiv, + FeatureHWDivARM, FeatureAvoidPartialCPSR, + FeatureHasSlowFPVMLx]>; + +// FIXME: It has not been determined if A15 has these features. +def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", + "Cortex-A15 ARM processors", + [FeatureT2XtPk, FeatureFP16, + FeatureAvoidPartialCPSR]>; class ProcNoItin<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; @@ -214,6 +229,10 @@ def : ProcessorModel<"cortex-a9-mp", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureMP, FeatureHasRAS]>; +// FIXME: A15 has currently the same ProcessorModel as A9. +def : ProcessorModel<"cortex-a15", CortexA9Model, + [ProcA15, HasV7Ops, FeatureNEON, FeatureDB, + FeatureDSPThumb2, FeatureHasRAS]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [HasV7Ops, @@ -227,6 +246,12 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops, FeatureT2XtPk, FeatureVFP4, FeatureVFPOnlySP, FeatureMClass]>; +// Swift uArch Processors. +def : ProcessorModel<"swift", SwiftModel, + [ProcSwift, HasV7Ops, FeatureNEON, + FeatureDB, FeatureDSPThumb2, + FeatureHasRAS]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index e9e2803..d439d1d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -23,6 +23,8 @@ #include "InstPrinter/ARMInstPrinter.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMMCExpr.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Constants.h" #include "llvm/DebugInfo.h" #include "llvm/Module.h" @@ -40,9 +42,8 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/SmallString.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -302,7 +303,7 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() { } void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { - uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); + uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType()); assert(Size && "C++ constructor pointer had zero size!"); const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts()); @@ -389,16 +390,6 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, //===--------------------------------------------------------------------===// MCSymbol *ARMAsmPrinter:: -GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2, - const MachineBasicBlock *MBB) const { - SmallString<60> Name; - raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() - << getFunctionNumber() << '_' << uid << '_' << uid2 - << "_set_" << MBB->getNumber(); - return OutContext.GetOrCreateSymbol(Name.str()); -} - -MCSymbol *ARMAsmPrinter:: GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { SmallString<60> Name; raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI" @@ -592,9 +583,24 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { const TargetLoweringObjectFileMachO &TLOFMacho = static_cast<const TargetLoweringObjectFileMachO &>( getObjFileLowering()); - OutStreamer.SwitchSection(TLOFMacho.getTextSection()); - OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection()); - OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection()); + + // Collect the set of sections our functions will go into. + SetVector<const MCSection *, SmallVector<const MCSection *, 8>, + SmallPtrSet<const MCSection *, 8> > TextSections; + // Default text section comes first. + TextSections.insert(TLOFMacho.getTextSection()); + // Now any user defined text sections from function attributes. + for (Module::iterator F = M.begin(), e = M.end(); F != e; ++F) + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) + TextSections.insert(TLOFMacho.SectionForGlobal(F, Mang, TM)); + // Now the coalescable sections. + TextSections.insert(TLOFMacho.getTextCoalSection()); + TextSections.insert(TLOFMacho.getConstTextCoalSection()); + + // Emit the sections in the .s file header to fix the order. + for (unsigned i = 0, e = TextSections.size(); i != e; ++i) + OutStreamer.SwitchSection(TextSections[i]); + if (RelocM == Reloc::DynamicNoPIC) { const MCSection *sect = OutContext.getMachOSection("__TEXT", "__symbol_stub4", @@ -743,13 +749,28 @@ void ARMAsmPrinter::emitAttributes() { AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); } else if (CPUString == "generic") { - // FIXME: Why these defaults? - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T); + // For a generic CPU, we assume a standard v7a architecture in Subtarget. + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::ApplicationProfile); AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use, ARMBuildAttrs::Allowed); AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::Allowed); - } + ARMBuildAttrs::AllowThumb32); + } else if (Subtarget->hasV7Ops()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); + AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumb32); + } else if (Subtarget->hasV6T2Ops()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6T2); + else if (Subtarget->hasV6Ops()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6); + else if (Subtarget->hasV5TEOps()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TE); + else if (Subtarget->hasV5TOps()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T); + else if (Subtarget->hasV4TOps()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T); if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of @@ -893,7 +914,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) { void ARMAsmPrinter:: EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { - int Size = TM.getTargetData()->getTypeAllocSize(MCPV->getType()); + int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType()); ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV); @@ -1091,16 +1112,6 @@ static void populateADROperands(MCInst &Inst, unsigned Dest, Inst.addOperand(MCOperand::CreateReg(ccreg)); } -void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI, - unsigned Opcode) { - MCInst TmpInst; - - // Emit the instruction as usual, just patch the opcode. - LowerARMMachineInstrToMCInst(MI, TmpInst, *this); - TmpInst.setOpcode(Opcode); - OutStreamer.EmitInstruction(TmpInst); -} - void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { assert(MI->getFlag(MachineInstr::FrameSetup) && "Only instruction which are involved into frame setup code are allowed"); @@ -1402,31 +1413,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::t2BMOVPCB_CALL: { - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::t2B); - const GlobalValue *GV = MI->getOperand(0).getGlobal(); - MCSymbol *GVSym = Mang->getSymbol(GV); - const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); - TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr)); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - return; - } case ARM::MOVi16_ga_pcrel: case ARM::t2MOVi16_ga_pcrel: { MCInst TmpInst; diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h index 3555e8f5..c875b2c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h @@ -53,7 +53,7 @@ public: Subtarget = &TM.getSubtarget<ARMSubtarget>(); } - virtual const char *getPassName() const { + virtual const char *getPassName() const LLVM_OVERRIDE { return "ARM Assembly Printer"; } @@ -62,22 +62,24 @@ public: virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); + raw_ostream &O) LLVM_OVERRIDE; virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O); + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) LLVM_OVERRIDE; void EmitJumpTable(const MachineInstr *MI); void EmitJump2Table(const MachineInstr *MI); - virtual void EmitInstruction(const MachineInstr *MI); - bool runOnMachineFunction(MachineFunction &F); + virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE; + virtual bool runOnMachineFunction(MachineFunction &F) LLVM_OVERRIDE; - virtual void EmitConstantPool() {} // we emit constant pools customly! - virtual void EmitFunctionBodyEnd(); - virtual void EmitFunctionEntryLabel(); - void EmitStartOfAsmFile(Module &M); - void EmitEndOfAsmFile(Module &M); - void EmitXXStructor(const Constant *CV); + virtual void EmitConstantPool() LLVM_OVERRIDE { + // we emit constant pools customly! + } + virtual void EmitFunctionBodyEnd() LLVM_OVERRIDE; + virtual void EmitFunctionEntryLabel() LLVM_OVERRIDE; + virtual void EmitStartOfAsmFile(Module &M) LLVM_OVERRIDE; + virtual void EmitEndOfAsmFile(Module &M) LLVM_OVERRIDE; + virtual void EmitXXStructor(const Constant *CV) LLVM_OVERRIDE; // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); @@ -101,12 +103,13 @@ private: public: void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); - MachineLocation getDebugValueLocation(const MachineInstr *MI) const; + virtual MachineLocation + getDebugValueLocation(const MachineInstr *MI) const LLVM_OVERRIDE; /// EmitDwarfRegOp - Emit dwarf register operation. - virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const; + virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const LLVM_OVERRIDE; - virtual unsigned getISAEncoding() { + virtual unsigned getISAEncoding() LLVM_OVERRIDE { // ARM/Darwin adds ISA to the DWARF info for each function. if (!Subtarget->isTargetDarwin()) return 0; @@ -114,18 +117,19 @@ public: ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm; } +private: MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); - MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2, - const MachineBasicBlock *MBB) const; MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const; MCSymbol *GetARMSJLJEHLabel(void) const; MCSymbol *GetARMGVSymbol(const GlobalValue *GV); +public: /// EmitMachineConstantPoolValue - Print a machine constantpool value to /// the .s file. - virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV); + virtual void + EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) LLVM_OVERRIDE; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 1cc5a17..3c7bb24 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -49,6 +49,11 @@ static cl::opt<bool> WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), cl::desc("Widen ARM vmovs to vmovd when possible")); +static cl::opt<unsigned> +SwiftPartialUpdateClearance("swift-partial-update-clearance", + cl::Hidden, cl::init(12), + cl::desc("Clearance before partial register updates")); + /// ARM_MLxEntry - Record information about MLA / MLS instructions. struct ARM_MLxEntry { uint16_t MLxOpc; // MLA / MLS opcode @@ -683,7 +688,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Handle register classes that require multiple instructions. unsigned BeginIdx = 0; unsigned SubRegs = 0; - unsigned Spacing = 1; + int Spacing = 1; // Use VORRq when possible. if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) @@ -697,6 +702,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3; else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4; + else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) + Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2; else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2; @@ -705,27 +712,38 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; - if (Opc) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MachineInstrBuilder Mov; - for (unsigned i = 0; i != SubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); - unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); - assert(Dst && Src && "Bad sub-register"); - Mov = AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) - .addReg(Src)); - // VORR takes two source operands. - if (Opc == ARM::VORRq) - Mov.addReg(Src); - } - // Add implicit super-register defs and kills to the last instruction. - Mov->addRegisterDefined(DestReg, TRI); - if (KillSrc) - Mov->addRegisterKilled(SrcReg, TRI); - return; - } + assert(Opc && "Impossible reg-to-reg copy"); + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineInstrBuilder Mov; - llvm_unreachable("Impossible reg-to-reg copy"); + // Copy register tuples backward when the first Dest reg overlaps with SrcReg. + if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { + BeginIdx = BeginIdx + ((SubRegs-1)*Spacing); + Spacing = -Spacing; + } +#ifndef NDEBUG + SmallSet<unsigned, 4> DstRegs; +#endif + for (unsigned i = 0; i != SubRegs; ++i) { + unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); + unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); + assert(Dst && Src && "Bad sub-register"); +#ifndef NDEBUG + assert(!DstRegs.count(Src) && "destructive vector copy"); + DstRegs.insert(Dst); +#endif + Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) + .addReg(Src); + // VORR takes two source operands. + if (Opc == ARM::VORRq) + Mov.addReg(Src); + Mov = AddDefaultPred(Mov); + } + // Add implicit super-register defs and kills to the last instruction. + Mov->addRegisterDefined(DestReg, TRI); + if (KillSrc) + Mov->addRegisterKilled(SrcReg, TRI); } static const @@ -775,6 +793,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) + .addFrameIndex(FI)) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); } else llvm_unreachable("Unknown reg class!"); break; @@ -922,6 +947,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = @@ -947,6 +973,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (ARM::DPRRegClass.hasSubClassEq(RC)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { + unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA; + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); } else llvm_unreachable("Unknown reg class!"); break; @@ -1378,7 +1413,6 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case ARM::VLDRD: case ARM::VLDRS: case ARM::t2LDRi8: - case ARM::t2LDRDi8: case ARM::t2LDRSHi8: case ARM::t2LDRi12: case ARM::t2LDRSHi12: @@ -1517,6 +1551,14 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; } +bool +ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const { + // Reduce false anti-dependencies to let Swift's out-of-order execution + // engine do its thing. + return Subtarget.isSwift(); +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. @@ -1569,71 +1611,41 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { } /// Identify instructions that can be folded into a MOVCC instruction, and -/// return the corresponding opcode for the predicated pseudo-instruction. -static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI, - const MachineRegisterInfo &MRI) { +/// return the defining instruction. +static MachineInstr *canFoldIntoMOVCC(unsigned Reg, + const MachineRegisterInfo &MRI, + const TargetInstrInfo *TII) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) return 0; if (!MRI.hasOneNonDBGUse(Reg)) return 0; - MI = MRI.getVRegDef(Reg); + MachineInstr *MI = MRI.getVRegDef(Reg); if (!MI) return 0; + // MI is folded into the MOVCC by predicating it. + if (!MI->isPredicable()) + return 0; // Check if MI has any non-dead defs or physreg uses. This also detects // predicated instructions which will be reading CPSR. for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); + // Reject frame index operands, PEI can't handle the predicated pseudos. + if (MO.isFI() || MO.isCPI() || MO.isJTI()) + return 0; if (!MO.isReg()) continue; + // MI can't have any tied operands, that would conflict with predication. + if (MO.isTied()) + return 0; if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) return 0; if (MO.isDef() && !MO.isDead()) return 0; } - switch (MI->getOpcode()) { - default: return 0; - case ARM::ANDri: return ARM::ANDCCri; - case ARM::ANDrr: return ARM::ANDCCrr; - case ARM::ANDrsi: return ARM::ANDCCrsi; - case ARM::ANDrsr: return ARM::ANDCCrsr; - case ARM::t2ANDri: return ARM::t2ANDCCri; - case ARM::t2ANDrr: return ARM::t2ANDCCrr; - case ARM::t2ANDrs: return ARM::t2ANDCCrs; - case ARM::EORri: return ARM::EORCCri; - case ARM::EORrr: return ARM::EORCCrr; - case ARM::EORrsi: return ARM::EORCCrsi; - case ARM::EORrsr: return ARM::EORCCrsr; - case ARM::t2EORri: return ARM::t2EORCCri; - case ARM::t2EORrr: return ARM::t2EORCCrr; - case ARM::t2EORrs: return ARM::t2EORCCrs; - case ARM::ORRri: return ARM::ORRCCri; - case ARM::ORRrr: return ARM::ORRCCrr; - case ARM::ORRrsi: return ARM::ORRCCrsi; - case ARM::ORRrsr: return ARM::ORRCCrsr; - case ARM::t2ORRri: return ARM::t2ORRCCri; - case ARM::t2ORRrr: return ARM::t2ORRCCrr; - case ARM::t2ORRrs: return ARM::t2ORRCCrs; - - // ARM ADD/SUB - case ARM::ADDri: return ARM::ADDCCri; - case ARM::ADDrr: return ARM::ADDCCrr; - case ARM::ADDrsi: return ARM::ADDCCrsi; - case ARM::ADDrsr: return ARM::ADDCCrsr; - case ARM::SUBri: return ARM::SUBCCri; - case ARM::SUBrr: return ARM::SUBCCrr; - case ARM::SUBrsi: return ARM::SUBCCrsi; - case ARM::SUBrsr: return ARM::SUBCCrsr; - - // Thumb2 ADD/SUB - case ARM::t2ADDri: return ARM::t2ADDCCri; - case ARM::t2ADDri12: return ARM::t2ADDCCri12; - case ARM::t2ADDrr: return ARM::t2ADDCCrr; - case ARM::t2ADDrs: return ARM::t2ADDCCrs; - case ARM::t2SUBri: return ARM::t2SUBCCri; - case ARM::t2SUBri12: return ARM::t2SUBCCri12; - case ARM::t2SUBrr: return ARM::t2SUBCCrr; - case ARM::t2SUBrs: return ARM::t2SUBCCrs; - } + bool DontMoveAcrossStores = true; + if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores)) + return 0; + return MI; } bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, @@ -1662,19 +1674,18 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && "Unknown select instruction"); const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); - MachineInstr *DefMI = 0; - unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI); - bool Invert = !Opc; - if (!Opc) - Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI); - if (!Opc) + MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); + bool Invert = !DefMI; + if (!DefMI) + DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); + if (!DefMI) return 0; // Create a new predicated version of DefMI. // Rfalse is the first use. MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - get(Opc), MI->getOperand(0).getReg()) - .addOperand(MI->getOperand(Invert ? 2 : 1)); + DefMI->getDesc(), + MI->getOperand(0).getReg()); // Copy all the DefMI operands, excluding its (null) predicate. const MCInstrDesc &DefDesc = DefMI->getDesc(); @@ -1693,6 +1704,15 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, if (NewMI->hasOptionalDef()) AddDefaultCC(NewMI); + // The output register value when the predicate is false is an implicit + // register operand tied to the first def. + // The tie makes the register allocator ensure the FalseReg is allocated the + // same register as operand 0. + MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); + FalseReg.setImplicit(); + NewMI->addOperand(FalseReg); + NewMI->tieOperands(0, NewMI->getNumOperands() - 1); + // The caller will erase MI, but not DefMI. DefMI->eraseFromParent(); return NewMI; @@ -2039,13 +2059,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // Masked compares sometimes use the same register as the corresponding 'and'. if (CmpMask != ~0) { - if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) { + if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { MI = 0; for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), UE = MRI->use_end(); UI != UE; ++UI) { if (UI->getParent() != CmpInstr->getParent()) continue; MachineInstr *PotentialAND = &*UI; - if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true)) + if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || + isPredicated(PotentialAND)) continue; MI = PotentialAND; break; @@ -2111,6 +2132,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // The single candidate is called MI. if (!MI) MI = Sub; + // We can't use a predicated instruction - it doesn't always write the flags. + if (isPredicated(MI)) + return false; + switch (MI->getOpcode()) { default: break; case ARM::RSBrr: @@ -2217,6 +2242,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // Toggle the optional operand to CPSR. MI->getOperand(5).setReg(ARM::CPSR); MI->getOperand(5).setIsDef(true); + assert(!isPredicated(MI) && "Can't use flags from predicated instruction"); CmpInstr->eraseFromParent(); // Modify the condition code of operands in OperandsToUpdate. @@ -2347,6 +2373,260 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, return true; } +static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, + const MachineInstr *MI) { + switch (MI->getOpcode()) { + default: { + const MCInstrDesc &Desc = MI->getDesc(); + int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); + assert(UOps >= 0 && "bad # UOps"); + return UOps; + } + + case ARM::LDRrs: + case ARM::LDRBrs: + case ARM::STRrs: + case ARM::STRBrs: { + unsigned ShOpVal = MI->getOperand(3).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 1; + return 2; + } + + case ARM::LDRH: + case ARM::STRH: { + if (!MI->getOperand(2).getReg()) + return 1; + + unsigned ShOpVal = MI->getOperand(3).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 1; + return 2; + } + + case ARM::LDRSB: + case ARM::LDRSH: + return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2; + + case ARM::LDRSB_POST: + case ARM::LDRSH_POST: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + return (Rt == Rm) ? 4 : 3; + } + + case ARM::LDR_PRE_REG: + case ARM::LDRB_PRE_REG: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + if (Rt == Rm) + return 3; + unsigned ShOpVal = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 2; + return 3; + } + + case ARM::STR_PRE_REG: + case ARM::STRB_PRE_REG: { + unsigned ShOpVal = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 2; + return 3; + } + + case ARM::LDRH_PRE: + case ARM::STRH_PRE: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + if (!Rm) + return 2; + if (Rt == Rm) + return 3; + return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) + ? 3 : 2; + } + + case ARM::LDR_POST_REG: + case ARM::LDRB_POST_REG: + case ARM::LDRH_POST: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + return (Rt == Rm) ? 3 : 2; + } + + case ARM::LDR_PRE_IMM: + case ARM::LDRB_PRE_IMM: + case ARM::LDR_POST_IMM: + case ARM::LDRB_POST_IMM: + case ARM::STRB_POST_IMM: + case ARM::STRB_POST_REG: + case ARM::STRB_PRE_IMM: + case ARM::STRH_POST: + case ARM::STR_POST_IMM: + case ARM::STR_POST_REG: + case ARM::STR_PRE_IMM: + return 2; + + case ARM::LDRSB_PRE: + case ARM::LDRSH_PRE: { + unsigned Rm = MI->getOperand(3).getReg(); + if (Rm == 0) + return 3; + unsigned Rt = MI->getOperand(0).getReg(); + if (Rt == Rm) + return 4; + unsigned ShOpVal = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 3; + return 4; + } + + case ARM::LDRD: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(2).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; + return (Rt == Rn) ? 3 : 2; + } + + case ARM::STRD: { + unsigned Rm = MI->getOperand(3).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; + return 2; + } + + case ARM::LDRD_POST: + case ARM::t2LDRD_POST: + return 3; + + case ARM::STRD_POST: + case ARM::t2STRD_POST: + return 4; + + case ARM::LDRD_PRE: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(3).getReg(); + unsigned Rm = MI->getOperand(4).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; + return (Rt == Rn) ? 4 : 3; + } + + case ARM::t2LDRD_PRE: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(3).getReg(); + return (Rt == Rn) ? 4 : 3; + } + + case ARM::STRD_PRE: { + unsigned Rm = MI->getOperand(4).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; + return 3; + } + + case ARM::t2STRD_PRE: + return 3; + + case ARM::t2LDR_POST: + case ARM::t2LDRB_POST: + case ARM::t2LDRB_PRE: + case ARM::t2LDRSBi12: + case ARM::t2LDRSBi8: + case ARM::t2LDRSBpci: + case ARM::t2LDRSBs: + case ARM::t2LDRH_POST: + case ARM::t2LDRH_PRE: + case ARM::t2LDRSBT: + case ARM::t2LDRSB_POST: + case ARM::t2LDRSB_PRE: + case ARM::t2LDRSH_POST: + case ARM::t2LDRSH_PRE: + case ARM::t2LDRSHi12: + case ARM::t2LDRSHi8: + case ARM::t2LDRSHpci: + case ARM::t2LDRSHs: + return 2; + + case ARM::t2LDRDi8: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(2).getReg(); + return (Rt == Rn) ? 3 : 2; + } + + case ARM::t2STRB_POST: + case ARM::t2STRB_PRE: + case ARM::t2STRBs: + case ARM::t2STRDi8: + case ARM::t2STRH_POST: + case ARM::t2STRH_PRE: + case ARM::t2STRHs: + case ARM::t2STR_POST: + case ARM::t2STR_PRE: + case ARM::t2STRs: + return 2; + } +} + +// Return the number of 32-bit words loaded by LDM or stored by STM. If this +// can't be easily determined return 0 (missing MachineMemOperand). +// +// FIXME: The current MachineInstr design does not support relying on machine +// mem operands to determine the width of a memory access. Instead, we expect +// the target to provide this information based on the instruction opcode and +// operands. However, using MachineMemOperand is a the best solution now for +// two reasons: +// +// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI +// operands. This is much more dangerous than using the MachineMemOperand +// sizes because CodeGen passes can insert/remove optional machine operands. In +// fact, it's totally incorrect for preRA passes and appears to be wrong for +// postRA passes as well. +// +// 2) getNumLDMAddresses is only used by the scheduling machine model and any +// machine model that calls this should handle the unknown (zero size) case. +// +// Long term, we should require a target hook that verifies MachineMemOperand +// sizes during MC lowering. That target hook should be local to MC lowering +// because we can't ensure that it is aware of other MI forms. Doing this will +// ensure that MachineMemOperands are correctly propagated through all passes. +unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const { + unsigned Size = 0; + for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), + E = MI->memoperands_end(); I != E; ++I) { + Size += (*I)->getSize(); + } + return Size / 4; +} + unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr *MI) const { @@ -2356,8 +2636,12 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MCInstrDesc &Desc = MI->getDesc(); unsigned Class = Desc.getSchedClass(); int ItinUOps = ItinData->getNumMicroOps(Class); - if (ItinUOps >= 0) + if (ItinUOps >= 0) { + if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) + return getNumMicroOpsSwiftLdSt(ItinData, MI); + return ItinUOps; + } unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -2426,7 +2710,43 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, case ARM::t2STMIA_UPD: case ARM::t2STMDB_UPD: { unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; - if (Subtarget.isCortexA8()) { + if (Subtarget.isSwift()) { + // rdar://8402126 + int UOps = 1 + NumRegs; // One for address computation, one for each ld / st. + switch (Opc) { + default: break; + case ARM::VLDMDIA_UPD: + case ARM::VLDMDDB_UPD: + case ARM::VLDMSIA_UPD: + case ARM::VLDMSDB_UPD: + case ARM::VSTMDIA_UPD: + case ARM::VSTMDDB_UPD: + case ARM::VSTMSIA_UPD: + case ARM::VSTMSDB_UPD: + case ARM::LDMIA_UPD: + case ARM::LDMDA_UPD: + case ARM::LDMDB_UPD: + case ARM::LDMIB_UPD: + case ARM::STMIA_UPD: + case ARM::STMDA_UPD: + case ARM::STMDB_UPD: + case ARM::STMIB_UPD: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: + case ARM::t2LDMIA_UPD: + case ARM::t2LDMDB_UPD: + case ARM::t2STMIA_UPD: + case ARM::t2STMDB_UPD: + ++UOps; // One for base register writeback. + break; + case ARM::LDMIA_RET: + case ARM::tPOP_RET: + case ARM::t2LDMIA_RET: + UOps += 2; // One for base reg wb, one for write to pc. + break; + } + return UOps; + } else if (Subtarget.isCortexA8()) { if (NumRegs < 4) return 2; // 4 registers would be issued: 2, 2. @@ -2435,7 +2755,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, if (NumRegs % 2) ++A8UOps; return A8UOps; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { int A9UOps = (NumRegs / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2468,7 +2788,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, DefCycle = RegNo / 2 + 1; if (RegNo % 2) ++DefCycle; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { DefCycle = RegNo; bool isSLoad = false; @@ -2512,7 +2832,7 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, DefCycle = 1; // Result latency is issue cycle + 2: E2. DefCycle += 2; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { DefCycle = (RegNo / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2543,7 +2863,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, UseCycle = RegNo / 2 + 1; if (RegNo % 2) ++UseCycle; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { UseCycle = RegNo; bool isSStore = false; @@ -2584,7 +2904,7 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, UseCycle = 2; // Read in E3. UseCycle += 2; - } else if (Subtarget.isCortexA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { UseCycle = (RegNo / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2769,7 +3089,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr *DefMI, const MCInstrDesc *DefMCID, unsigned DefAlign) { int Adjust = 0; - if (Subtarget.isCortexA8() || Subtarget.isCortexA9()) { + if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID->getOpcode()) { @@ -2794,9 +3114,40 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, break; } } + } else if (Subtarget.isSwift()) { + // FIXME: Properly handle all of the latency adjustments for address + // writeback. + switch (DefMCID->getOpcode()) { + default: break; + case ARM::LDRrs: + case ARM::LDRBrs: { + unsigned ShOpVal = DefMI->getOperand(3).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + Adjust -= 2; + else if (!isSub && + ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) + --Adjust; + break; + } + case ARM::t2LDRs: + case ARM::t2LDRBs: + case ARM::t2LDRHs: + case ARM::t2LDRSHs: { + // Thumb2 mode: lsl only. + unsigned ShAmt = DefMI->getOperand(3).getImm(); + if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) + Adjust -= 2; + break; + } + } } - if (DefAlign < 8 && Subtarget.isCortexA9()) { + if (DefAlign < 8 && Subtarget.isLikeA9()) { switch (DefMCID->getOpcode()) { default: break; case ARM::VLD1q8: @@ -2954,7 +3305,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (Reg == ARM::CPSR) { if (DefMI->getOpcode() == ARM::FMSTAT) { // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) - return Subtarget.isCortexA9() ? 1 : 20; + return Subtarget.isLikeA9() ? 1 : 20; } // CPSR set and branch can be paired in the same cycle. @@ -2970,7 +3321,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // instructions). if (Latency > 0 && Subtarget.isThumb2()) { const MachineFunction *MF = DefMI->getParent()->getParent(); - if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + if (MF->getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize)) --Latency; } return Latency; @@ -3020,7 +3372,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (!UseNode->isMachineOpcode()) { int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); - if (Subtarget.isCortexA9()) + if (Subtarget.isLikeA9() || Subtarget.isSwift()) return Latency <= 2 ? 1 : Latency - 1; else return Latency <= 3 ? 1 : Latency - 2; @@ -3037,7 +3389,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, UseMCID, UseIdx, UseAlign); if (Latency > 1 && - (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { + (Subtarget.isCortexA8() || Subtarget.isLikeA9())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID.getOpcode()) { @@ -3064,9 +3416,36 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, break; } } + } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) { + // FIXME: Properly handle all of the latency adjustments for address + // writeback. + switch (DefMCID.getOpcode()) { + default: break; + case ARM::LDRrs: + case ARM::LDRBrs: { + unsigned ShOpVal = + cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) + Latency -= 2; + else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) + --Latency; + break; + } + case ARM::t2LDRs: + case ARM::t2LDRBs: + case ARM::t2LDRHs: + case ARM::t2LDRSHs: { + // Thumb2 mode: lsl 0-3 only. + Latency -= 2; + break; + } + } } - if (DefAlign < 8 && Subtarget.isCortexA9()) + if (DefAlign < 8 && Subtarget.isLikeA9()) switch (DefMCID.getOpcode()) { default: break; case ARM::VLD1q8: @@ -3190,18 +3569,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } -unsigned -ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *DepMI) const { - unsigned Reg = DefMI->getOperand(DefIdx).getReg(); - if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI)) - return 1; - - // If the second MI is predicated, then there is an implicit use dependency. - return getInstrLatency(ItinData, DefMI); -} - unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { @@ -3359,11 +3726,12 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); - // Cortex-A9 is particularly picky about mixing the two and wants these + // A9-like cores are particularly picky about mixing the two and want these // converted. - if (Subtarget.isCortexA9() && !isPredicated(MI) && + if (Subtarget.isLikeA9() && !isPredicated(MI) && (MI->getOpcode() == ARM::VMOVRS || - MI->getOpcode() == ARM::VMOVSR)) + MI->getOpcode() == ARM::VMOVSR || + MI->getOpcode() == ARM::VMOVS)) return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); // No other instructions can be swizzled, so just determine their domain. @@ -3383,13 +3751,70 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { return std::make_pair(ExeGeneric, 0); } +static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, + unsigned SReg, unsigned &Lane) { + unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); + Lane = 0; + + if (DReg != ARM::NoRegister) + return DReg; + + Lane = 1; + DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); + + assert(DReg && "S-register with no D super-register?"); + return DReg; +} + +/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, +/// set ImplicitSReg to a register number that must be marked as implicit-use or +/// zero if no register needs to be defined as implicit-use. +/// +/// If the function cannot determine if an SPR should be marked implicit use or +/// not, it returns false. +/// +/// This function handles cases where an instruction is being modified from taking +/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict +/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other +/// lane of the DPR). +/// +/// If the other SPR is defined, an implicit-use of it should be added. Else, +/// (including the case where the DPR itself is defined), it should not. +/// +static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, + MachineInstr *MI, + unsigned DReg, unsigned Lane, + unsigned &ImplicitSReg) { + // If the DPR is defined or used already, the other SPR lane will be chained + // correctly, so there is nothing to be done. + if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) { + ImplicitSReg = 0; + return true; + } + + // Otherwise we need to go searching to see if the SPR is set explicitly. + ImplicitSReg = TRI->getSubReg(DReg, + (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); + MachineBasicBlock::LivenessQueryResult LQR = + MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI); + + if (LQR == MachineBasicBlock::LQR_Live) + return true; + else if (LQR == MachineBasicBlock::LQR_Unknown) + return false; + + // If the register is known not to be live, there is no need to add an + // implicit-use. + ImplicitSReg = 0; + return true; +} + void ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { unsigned DstReg, SrcReg, DReg; unsigned Lane; MachineInstrBuilder MIB(MI); const TargetRegisterInfo *TRI = &getRegisterInfo(); - bool isKill; switch (MI->getOpcode()) { default: llvm_unreachable("cannot handle opcode!"); @@ -3400,82 +3825,294 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // Zap the predicate operands. assert(!isPredicated(MI) && "Cannot predicate a VORRd"); - MI->RemoveOperand(3); - MI->RemoveOperand(2); - // Change to a VORRd which requires two identical use operands. - MI->setDesc(get(ARM::VORRd)); + // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); - // Add the extra source operand and new predicates. - // This will go before any implicit ops. - AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); + + // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) + MI->setDesc(get(ARM::VORRd)); + AddDefaultPred(MIB.addReg(DstReg, RegState::Define) + .addReg(SrcReg) + .addReg(SrcReg)); break; case ARM::VMOVRS: if (Domain != ExeNEON) break; assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); + // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); - DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass); - Lane = 0; - if (DReg == ARM::NoRegister) { - DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass); - Lane = 1; - assert(DReg && "S-register with no D super-register?"); - } + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); - MI->RemoveOperand(3); - MI->RemoveOperand(2); - MI->RemoveOperand(1); + DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); + // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) + // Note that DSrc has been widened and the other lane may be undef, which + // contaminates the entire register. MI->setDesc(get(ARM::VGETLNi32)); - MIB.addReg(DReg); - MIB.addImm(Lane); + AddDefaultPred(MIB.addReg(DstReg, RegState::Define) + .addReg(DReg, RegState::Undef) + .addImm(Lane)); - MIB->getOperand(1).setIsUndef(); + // The old source should be an implicit use, otherwise we might think it + // was dead before here. MIB.addReg(SrcReg, RegState::Implicit); - - AddDefaultPred(MIB); break; - case ARM::VMOVSR: + case ARM::VMOVSR: { if (Domain != ExeNEON) break; assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); + // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); - DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass); - Lane = 0; - if (DReg == ARM::NoRegister) { - DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass); - Lane = 1; - assert(DReg && "S-register with no D super-register?"); - } - isKill = MI->getOperand(0).isKill(); - MI->RemoveOperand(3); - MI->RemoveOperand(2); - MI->RemoveOperand(1); - MI->RemoveOperand(0); + DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); + + unsigned ImplicitSReg; + if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) + break; + + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); + // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) + // Again DDst may be undefined at the beginning of this instruction. MI->setDesc(get(ARM::VSETLNi32)); - MIB.addReg(DReg); - MIB.addReg(DReg); - MIB.addReg(SrcReg); - MIB.addImm(Lane); + MIB.addReg(DReg, RegState::Define) + .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI))) + .addReg(SrcReg) + .addImm(Lane); + AddDefaultPred(MIB); + + // The narrower destination must be marked as set to keep previous chains + // in place. + MIB.addReg(DstReg, RegState::Define | RegState::Implicit); + if (ImplicitSReg != 0) + MIB.addReg(ImplicitSReg, RegState::Implicit); + break; + } + case ARM::VMOVS: { + if (Domain != ExeNEON) + break; + + // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + + unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; + DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); + DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); - MIB->getOperand(1).setIsUndef(); + unsigned ImplicitSReg; + if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg)) + break; - if (isKill) - MIB->addRegisterKilled(DstReg, TRI, true); - MIB->addRegisterDefined(DstReg, TRI); + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); + + if (DSrc == DDst) { + // Destination can be: + // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) + MI->setDesc(get(ARM::VDUPLN32d)); + MIB.addReg(DDst, RegState::Define) + .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI))) + .addImm(SrcLane); + AddDefaultPred(MIB); + + // Neither the source or the destination are naturally represented any + // more, so add them in manually. + MIB.addReg(DstReg, RegState::Implicit | RegState::Define); + MIB.addReg(SrcReg, RegState::Implicit); + if (ImplicitSReg != 0) + MIB.addReg(ImplicitSReg, RegState::Implicit); + break; + } + // In general there's no single instruction that can perform an S <-> S + // move in NEON space, but a pair of VEXT instructions *can* do the + // job. It turns out that the VEXTs needed will only use DSrc once, with + // the position based purely on the combination of lane-0 and lane-1 + // involved. For example + // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 + // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 + // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 + // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 + // + // Pattern of the MachineInstrs is: + // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) + MachineInstrBuilder NewMIB; + NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + get(ARM::VEXTd32), DDst); + + // On the first instruction, both DSrc and DDst may be <undef> if present. + // Specifically when the original instruction didn't have them as an + // <imp-use>. + unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; + bool CurUndef = !MI->readsRegister(CurReg, TRI); + NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); + + CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; + CurUndef = !MI->readsRegister(CurReg, TRI); + NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); + + NewMIB.addImm(1); + AddDefaultPred(NewMIB); + + if (SrcLane == DstLane) + NewMIB.addReg(SrcReg, RegState::Implicit); + + MI->setDesc(get(ARM::VEXTd32)); + MIB.addReg(DDst, RegState::Define); + + // On the second instruction, DDst has definitely been defined above, so + // it is not <undef>. DSrc, if present, can be <undef> as above. + CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; + CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); + MIB.addReg(CurReg, getUndefRegState(CurUndef)); + + CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; + CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); + MIB.addReg(CurReg, getUndefRegState(CurUndef)); + + MIB.addImm(1); AddDefaultPred(MIB); + + if (SrcLane != DstLane) + MIB.addReg(SrcReg, RegState::Implicit); + + // As before, the original destination is no longer represented, add it + // implicitly. + MIB.addReg(DstReg, RegState::Define | RegState::Implicit); + if (ImplicitSReg != 0) + MIB.addReg(ImplicitSReg, RegState::Implicit); break; + } + } + +} + +//===----------------------------------------------------------------------===// +// Partial register updates +//===----------------------------------------------------------------------===// +// +// Swift renames NEON registers with 64-bit granularity. That means any +// instruction writing an S-reg implicitly reads the containing D-reg. The +// problem is mostly avoided by translating f32 operations to v2f32 operations +// on D-registers, but f32 loads are still a problem. +// +// These instructions can load an f32 into a NEON register: +// +// VLDRS - Only writes S, partial D update. +// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. +// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. +// +// FCONSTD can be used as a dependency-breaking instruction. + + +unsigned ARMBaseInstrInfo:: +getPartialRegUpdateClearance(const MachineInstr *MI, + unsigned OpNum, + const TargetRegisterInfo *TRI) const { + // Only Swift has partial register update problems. + if (!SwiftPartialUpdateClearance || !Subtarget.isSwift()) + return 0; + + assert(TRI && "Need TRI instance"); + + const MachineOperand &MO = MI->getOperand(OpNum); + if (MO.readsReg()) + return 0; + unsigned Reg = MO.getReg(); + int UseOp = -1; + + switch(MI->getOpcode()) { + // Normal instructions writing only an S-register. + case ARM::VLDRS: + case ARM::FCONSTS: + case ARM::VMOVSR: + // rdar://problem/8791586 + case ARM::VMOVv8i8: + case ARM::VMOVv4i16: + case ARM::VMOVv2i32: + case ARM::VMOVv2f32: + case ARM::VMOVv1i64: + UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI); + break; + + // Explicitly reads the dependency. + case ARM::VLD1LNd32: + UseOp = 1; + break; + default: + return 0; + } + + // If this instruction actually reads a value from Reg, there is no unwanted + // dependency. + if (UseOp != -1 && MI->getOperand(UseOp).readsReg()) + return 0; + + // We must be able to clobber the whole D-reg. + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // Virtual register must be a foo:ssub_0<def,undef> operand. + if (!MO.getSubReg() || MI->readsVirtualRegister(Reg)) + return 0; + } else if (ARM::SPRRegClass.contains(Reg)) { + // Physical register: MI must define the full D-reg. + unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, + &ARM::DPRRegClass); + if (!DReg || !MI->definesRegister(DReg, TRI)) + return 0; } + // MI has an unwanted D-register dependency. + // Avoid defs in the previous N instructrions. + return SwiftPartialUpdateClearance; +} + +// Break a partial register dependency after getPartialRegUpdateClearance +// returned non-zero. +void ARMBaseInstrInfo:: +breakPartialRegDependency(MachineBasicBlock::iterator MI, + unsigned OpNum, + const TargetRegisterInfo *TRI) const { + assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def"); + assert(TRI && "Need TRI instance"); + + const MachineOperand &MO = MI->getOperand(OpNum); + unsigned Reg = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Can't break virtual register dependencies."); + unsigned DReg = Reg; + + // If MI defines an S-reg, find the corresponding D super-register. + if (ARM::SPRRegClass.contains(Reg)) { + DReg = ARM::D0 + (Reg - ARM::S0) / 2; + assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken"); + } + + assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); + assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); + + // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines + // the full D-register by loading the same value to both lanes. The + // instruction is micro-coded with 2 uops, so don't do this until we can + // properly schedule micro-coded instuctions. The dispatcher stalls cause + // too big regressions. + + // Insert the dependency-breaking FCONSTD before MI. + // 96 is the encoding of 0.5, but the actual value doesn't matter here. + AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + get(ARM::FCONSTD), DReg).addImm(96)); + MI->addRegisterKilled(DReg, TRI, true); } bool ARMBaseInstrInfo::hasNOP() const { diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 92e5ee8..6f38e35 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -182,10 +182,13 @@ public: virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, const BranchProbability - &Probability) const { + &Probability) const { return NumCycles == 1; } + virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const; + /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction @@ -226,15 +229,18 @@ public: SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const; - virtual unsigned getOutputLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *DepMI) const; - /// VFP/NEON execution domains. std::pair<uint16_t, uint16_t> getExecutionDomain(const MachineInstr *MI) const; void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; + unsigned getPartialRegUpdateClearance(const MachineInstr*, unsigned, + const TargetRegisterInfo*) const; + void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned, + const TargetRegisterInfo *TRI) const; + /// Get the number of addresses by LDM or VLDM or zero for unknown. + unsigned getNumLDMAddresses(const MachineInstr *MI) const; + private: unsigned getInstBundleLength(const MachineInstr *MI) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 9deb96e..e5b300f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -84,6 +84,11 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } +const uint32_t* +ARMBaseRegisterInfo::getNoPreservedMask() const { + return CSR_NoRegs_RegMask; +} + BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -106,148 +111,12 @@ getReservedRegs(const MachineFunction &MF) const { for (unsigned i = 0; i != 16; ++i) Reserved.set(ARM::D16 + i); } - return Reserved; -} - -bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, - unsigned Reg) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - switch (Reg) { - default: break; - case ARM::SP: - case ARM::PC: - return true; - case ARM::R6: - if (hasBasePointer(MF)) - return true; - break; - case ARM::R7: - case ARM::R11: - if (FramePtr == Reg && TFI->hasFP(MF)) - return true; - break; - case ARM::R9: - return STI.isR9Reserved(); - } - - return false; -} + const TargetRegisterClass *RC = &ARM::GPRPairRegClass; + for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I) + for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI) + if (Reserved.test(*SI)) Reserved.set(*I); -bool -ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC, - SmallVectorImpl<unsigned> &SubIndices, - unsigned &NewSubIdx) const { - - unsigned Size = RC->getSize() * 8; - if (Size < 6) - return 0; - - NewSubIdx = 0; // Whole register. - unsigned NumRegs = SubIndices.size(); - if (NumRegs == 8) { - // 8 D registers -> 1 QQQQ register. - return (Size == 512 && - SubIndices[0] == ARM::dsub_0 && - SubIndices[1] == ARM::dsub_1 && - SubIndices[2] == ARM::dsub_2 && - SubIndices[3] == ARM::dsub_3 && - SubIndices[4] == ARM::dsub_4 && - SubIndices[5] == ARM::dsub_5 && - SubIndices[6] == ARM::dsub_6 && - SubIndices[7] == ARM::dsub_7); - } else if (NumRegs == 4) { - if (SubIndices[0] == ARM::qsub_0) { - // 4 Q registers -> 1 QQQQ register. - return (Size == 512 && - SubIndices[1] == ARM::qsub_1 && - SubIndices[2] == ARM::qsub_2 && - SubIndices[3] == ARM::qsub_3); - } else if (SubIndices[0] == ARM::dsub_0) { - // 4 D registers -> 1 QQ register. - if (Size >= 256 && - SubIndices[1] == ARM::dsub_1 && - SubIndices[2] == ARM::dsub_2 && - SubIndices[3] == ARM::dsub_3) { - if (Size == 512) - NewSubIdx = ARM::qqsub_0; - return true; - } - } else if (SubIndices[0] == ARM::dsub_4) { - // 4 D registers -> 1 QQ register (2nd). - if (Size == 512 && - SubIndices[1] == ARM::dsub_5 && - SubIndices[2] == ARM::dsub_6 && - SubIndices[3] == ARM::dsub_7) { - NewSubIdx = ARM::qqsub_1; - return true; - } - } else if (SubIndices[0] == ARM::ssub_0) { - // 4 S registers -> 1 Q register. - if (Size >= 128 && - SubIndices[1] == ARM::ssub_1 && - SubIndices[2] == ARM::ssub_2 && - SubIndices[3] == ARM::ssub_3) { - if (Size >= 256) - NewSubIdx = ARM::qsub_0; - return true; - } - } - } else if (NumRegs == 2) { - if (SubIndices[0] == ARM::qsub_0) { - // 2 Q registers -> 1 QQ register. - if (Size >= 256 && SubIndices[1] == ARM::qsub_1) { - if (Size == 512) - NewSubIdx = ARM::qqsub_0; - return true; - } - } else if (SubIndices[0] == ARM::qsub_2) { - // 2 Q registers -> 1 QQ register (2nd). - if (Size == 512 && SubIndices[1] == ARM::qsub_3) { - NewSubIdx = ARM::qqsub_1; - return true; - } - } else if (SubIndices[0] == ARM::dsub_0) { - // 2 D registers -> 1 Q register. - if (Size >= 128 && SubIndices[1] == ARM::dsub_1) { - if (Size >= 256) - NewSubIdx = ARM::qsub_0; - return true; - } - } else if (SubIndices[0] == ARM::dsub_2) { - // 2 D registers -> 1 Q register (2nd). - if (Size >= 256 && SubIndices[1] == ARM::dsub_3) { - NewSubIdx = ARM::qsub_1; - return true; - } - } else if (SubIndices[0] == ARM::dsub_4) { - // 2 D registers -> 1 Q register (3rd). - if (Size == 512 && SubIndices[1] == ARM::dsub_5) { - NewSubIdx = ARM::qsub_2; - return true; - } - } else if (SubIndices[0] == ARM::dsub_6) { - // 2 D registers -> 1 Q register (3rd). - if (Size == 512 && SubIndices[1] == ARM::dsub_7) { - NewSubIdx = ARM::qsub_3; - return true; - } - } else if (SubIndices[0] == ARM::ssub_0) { - // 2 S registers -> 1 D register. - if (SubIndices[1] == ARM::ssub_1) { - if (Size >= 128) - NewSubIdx = ARM::dsub_0; - return true; - } - } else if (SubIndices[0] == ARM::ssub_2) { - // 2 S registers -> 1 D register (2nd). - if (Size >= 128 && SubIndices[1] == ARM::ssub_3) { - NewSubIdx = ARM::dsub_1; - return true; - } - } - } - return false; + return Reserved; } const TargetRegisterClass* @@ -263,6 +132,7 @@ ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) case ARM::QPRRegClassID: case ARM::QQPRRegClassID: case ARM::QQQQPRRegClassID: + case ARM::GPRPairRegClassID: return Super; } Super = *I++; @@ -476,7 +346,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, bool ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { // CortexA9 has a Write-after-write hazard for NEON registers. - if (!STI.isCortexA9()) + if (!STI.isLikeA9()) return false; switch (RC->getID()) { @@ -561,8 +431,9 @@ needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); - bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || - F->hasFnAttr(Attribute::StackAlignment)); + bool requiresRealignment = + ((MFI->getMaxAlignment() > StackAlign) || + F->getFnAttributes().hasAttribute(Attributes::StackAlignment)); return requiresRealignment && canRealignStack(MF); } @@ -595,6 +466,7 @@ unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const { unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const { + const MachineRegisterInfo &MRI = MF.getRegInfo(); switch (Reg) { default: break; // Return 0 if either register of the pair is a special register. @@ -603,10 +475,10 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, case ARM::R3: return ARM::R2; case ARM::R5: return ARM::R4; case ARM::R7: - return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) + return (MRI.isReserved(ARM::R7) || MRI.isReserved(ARM::R6)) ? 0 : ARM::R6; - case ARM::R9: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8; - case ARM::R11: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10; + case ARM::R9: return MRI.isReserved(ARM::R9) ? 0 :ARM::R8; + case ARM::R11: return MRI.isReserved(ARM::R11) ? 0 : ARM::R10; case ARM::S1: return ARM::S0; case ARM::S3: return ARM::S2; @@ -648,6 +520,7 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const { + const MachineRegisterInfo &MRI = MF.getRegInfo(); switch (Reg) { default: break; // Return 0 if either register of the pair is a special register. @@ -656,10 +529,10 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, case ARM::R2: return ARM::R3; case ARM::R4: return ARM::R5; case ARM::R6: - return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) + return (MRI.isReserved(ARM::R7) || MRI.isReserved(ARM::R6)) ? 0 : ARM::R7; - case ARM::R8: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9; - case ARM::R10: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11; + case ARM::R8: return MRI.isReserved(ARM::R9) ? 0 :ARM::R9; + case ARM::R10: return MRI.isReserved(ARM::R11) ? 0 : ARM::R11; case ARM::S0: return ARM::S1; case ARM::S2: return ARM::S3; diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index da29f7e..e2bdd04 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -96,19 +96,10 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; + const uint32_t *getNoPreservedMask() const; BitVector getReservedRegs(const MachineFunction &MF) const; - /// canCombineSubRegIndices - Given a register class and a list of - /// subregister indices, return true if it's possible to combine the - /// subregister indices into one that corresponds to a larger - /// subregister. Return the new subregister index by reference. Note the - /// new index may be zero if the given subregisters can be combined to - /// form the whole register. - virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC, - SmallVectorImpl<unsigned> &SubIndices, - unsigned &NewSubIdx) const; - const TargetRegisterClass* getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const; const TargetRegisterClass* @@ -170,8 +161,6 @@ public: unsigned MIFlags = MachineInstr::NoFlags)const; /// Code Generation virtual methods... - virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; - virtual bool requiresRegisterScavenging(const MachineFunction &MF) const; virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td index bda1517..b378b96 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td @@ -190,6 +190,8 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[ // Callee-saved register lists. //===----------------------------------------------------------------------===// +def CSR_NoRegs : CalleeSavedRegs<(add)>; + def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, (sequence "D%u", 15, 8))>; diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp index e81b4cc..6adbf4f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp @@ -47,7 +47,7 @@ namespace { class ARMCodeEmitter : public MachineFunctionPass { ARMJITInfo *JTI; const ARMBaseInstrInfo *II; - const TargetData *TD; + const DataLayout *TD; const ARMSubtarget *Subtarget; TargetMachine &TM; JITCodeEmitter &MCE; @@ -67,7 +67,7 @@ namespace { ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) : MachineFunctionPass(ID), JTI(0), II((const ARMBaseInstrInfo *)tm.getInstrInfo()), - TD(tm.getTargetData()), TM(tm), + TD(tm.getDataLayout()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} @@ -376,7 +376,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { "JIT relocation model must be set to static or default!"); JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo(); II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo(); - TD = MF.getTarget().getTargetData(); + TD = MF.getTarget().getDataLayout(); Subtarget = &TM.getSubtarget<ARMSubtarget>(); MCPEs = &MF.getConstantPool()->getConstants(); MJTEs = 0; @@ -389,7 +389,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { do { DEBUG(errs() << "JITTing function '" - << MF.getFunction()->getName() << "'\n"); + << MF.getName() << "'\n"); MCE.startFunction(MF); for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index a953985..a57368f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -22,7 +22,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -528,7 +528,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { // identity mapping of CPI's to CPE's. const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants(); - const TargetData &TD = *MF->getTarget().getTargetData(); + const DataLayout &TD = *MF->getTarget().getDataLayout(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); assert(Size >= 4 && "Too small constant pool entry"); @@ -1388,10 +1388,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { // If the original WaterList entry was "new water" on this iteration, // propagate that to the new island. This is just keeping NewWaterList // updated to match the WaterList, which will be updated below. - if (NewWaterList.count(WaterBB)) { - NewWaterList.erase(WaterBB); + if (NewWaterList.erase(WaterBB)) NewWaterList.insert(NewIsland); - } + // The new CPE goes before the following block (NewMBB). NewMBB = llvm::next(MachineFunction::iterator(WaterBB)); diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h index 6b98d44..ae531c4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h @@ -102,8 +102,6 @@ public: virtual void print(raw_ostream &O) const; void print(raw_ostream *O) const { if (O) print(*O); } void dump() const; - - static bool classof(const ARMConstantPoolValue *) { return true; } }; inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) { @@ -158,7 +156,6 @@ public: static bool classof(const ARMConstantPoolValue *APV) { return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA(); } - static bool classof(const ARMConstantPoolConstant *) { return true; } }; /// ARMConstantPoolSymbol - ARM-specific constantpool values for external @@ -192,7 +189,6 @@ public: static bool classof(const ARMConstantPoolValue *ACPV) { return ACPV->isExtSymbol(); } - static bool classof(const ARMConstantPoolSymbol *) { return true; } }; /// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic @@ -225,7 +221,6 @@ public: static bool classof(const ARMConstantPoolValue *ACPV) { return ACPV->isMachineBasicBlock(); } - static bool classof(const ARMConstantPoolMBB *) { return true; } }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp deleted file mode 100644 index f671317..0000000 --- a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp +++ /dev/null @@ -1,78 +0,0 @@ -//===-- ARMELFWriterInfo.cpp - ELF Writer Info for the ARM backend --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the ARM backend. -// -//===----------------------------------------------------------------------===// - -#include "ARMELFWriterInfo.h" -#include "ARMRelocations.h" -#include "llvm/Function.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/ELF.h" - -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Implementation of the ARMELFWriterInfo class -//===----------------------------------------------------------------------===// - -ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM) - : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64, - TM.getTargetData()->isLittleEndian()) { -} - -ARMELFWriterInfo::~ARMELFWriterInfo() {} - -unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { - switch (MachineRelTy) { - case ARM::reloc_arm_absolute: - case ARM::reloc_arm_relative: - case ARM::reloc_arm_cp_entry: - case ARM::reloc_arm_vfp_cp_entry: - case ARM::reloc_arm_machine_cp_entry: - case ARM::reloc_arm_jt_base: - case ARM::reloc_arm_pic_jt: - llvm_unreachable("unsupported ARM relocation type"); - - case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; - case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; - case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; - default: - llvm_unreachable("unknown ARM relocation type"); - } -} - -long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier) const { - llvm_unreachable("ARMELFWriterInfo::getDefaultAddendForRelTy() not " - "implemented"); -} - -unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const { - llvm_unreachable("ARMELFWriterInfo::getRelocationTySize() not implemented"); -} - -bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { - llvm_unreachable("ARMELFWriterInfo::isPCRelativeRel() not implemented"); -} - -unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const { - llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not " - "implemented"); -} - -long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset, - unsigned RelOffset, - unsigned RelTy) const { - llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not " - "implemented"); -} diff --git a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h deleted file mode 100644 index 6a84f8a..0000000 --- a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h +++ /dev/null @@ -1,59 +0,0 @@ -//===-- ARMELFWriterInfo.h - ELF Writer Info for ARM ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the ARM backend. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM_ELF_WRITER_INFO_H -#define ARM_ELF_WRITER_INFO_H - -#include "llvm/Target/TargetELFWriterInfo.h" - -namespace llvm { - class TargetMachine; - - class ARMELFWriterInfo : public TargetELFWriterInfo { - public: - ARMELFWriterInfo(TargetMachine &TM); - virtual ~ARMELFWriterInfo(); - - /// getRelocationType - Returns the target specific ELF Relocation type. - /// 'MachineRelTy' contains the object code independent relocation type - virtual unsigned getRelocationType(unsigned MachineRelTy) const; - - /// hasRelocationAddend - True if the target uses an addend in the - /// ELF relocation entry. - virtual bool hasRelocationAddend() const { return false; } - - /// getDefaultAddendForRelTy - Gets the default addend value for a - /// relocation entry based on the target ELF relocation type. - virtual long int getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier = 0) const; - - /// getRelTySize - Returns the size of relocatable field in bits - virtual unsigned getRelocationTySize(unsigned RelTy) const; - - /// isPCRelativeRel - True if the relocation type is pc relative - virtual bool isPCRelativeRel(unsigned RelTy) const; - - /// getJumpTableRelocationTy - Returns the machine relocation type used - /// to reference a jumptable. - virtual unsigned getAbsoluteLabelMachineRelTy() const; - - /// computeRelocation - Some relocatable fields could be relocated - /// directly, avoiding the relocation symbol emission, compute the - /// final relocation value for this symbol. - virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, - unsigned RelTy) const; - }; - -} // end llvm namespace - -#endif // ARM_ELF_WRITER_INFO_H diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 15bb32e..8c45e0b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -103,9 +103,9 @@ namespace { bool IsLoad; bool isUpdating; bool hasWritebackOperand; - NEONRegSpacing RegSpacing; - unsigned char NumRegs; // D registers loaded or stored - unsigned char RegElts; // elements per D register; used for lane ops + uint8_t RegSpacing; // One of type NEONRegSpacing + uint8_t NumRegs; // D registers loaded or stored + uint8_t RegElts; // elements per D register; used for lane ops // FIXME: Temporary flag to denote whether the real instruction takes // a single register (like the encoding) or all of the registers in // the list (like the asm syntax and the isel DAG). When all definitions @@ -377,7 +377,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed"); - NEONRegSpacing RegSpc = TableEntry->RegSpacing; + NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; unsigned NumRegs = TableEntry->NumRegs; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), @@ -442,7 +442,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed"); - NEONRegSpacing RegSpc = TableEntry->RegSpacing; + NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; unsigned NumRegs = TableEntry->NumRegs; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), @@ -493,7 +493,7 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); assert(TableEntry && "NEONLdStTable lookup failed"); - NEONRegSpacing RegSpc = TableEntry->RegSpacing; + NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; unsigned NumRegs = TableEntry->NumRegs; unsigned RegElts = TableEntry->RegElts; @@ -777,9 +777,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } - case ARM::Int_eh_sjlj_dispatchsetup: - case ARM::Int_eh_sjlj_dispatchsetup_nofp: - case ARM::tInt_eh_sjlj_dispatchsetup: { + case ARM::Int_eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index bf9d16e..6611862 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -40,7 +40,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" @@ -100,51 +100,53 @@ class ARMFastISel : public FastISel { } // Code from FastISel.cpp. - virtual unsigned FastEmitInst_(unsigned MachineInstOpcode, - const TargetRegisterClass *RC); - virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill); - virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill); - virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - unsigned Op2, bool Op2IsKill); - virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - uint64_t Imm); - virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - const ConstantFP *FPImm); - virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - uint64_t Imm); - virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm); - virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm1, uint64_t Imm2); - - virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, - unsigned Op0, bool Op0IsKill, - uint32_t Idx); + private: + unsigned FastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass *RC); + unsigned FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill); + unsigned FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); + unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill); + unsigned FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm); + unsigned FastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm); + unsigned FastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm); + unsigned FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm); + unsigned FastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2); + + unsigned FastEmitInst_extractsubreg(MVT RetVT, + unsigned Op0, bool Op0IsKill, + uint32_t Idx); // Backend specific FastISel code. + private: virtual bool TargetSelectInstruction(const Instruction *I); virtual unsigned TargetMaterializeConstant(const Constant *C); virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, const LoadInst *LI); - + private: #include "ARMGenFastISel.inc" // Instruction selection routines. @@ -192,6 +194,7 @@ class ARMFastISel : public FastISel { unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); unsigned ARMSelectCallOp(bool UseReg); + unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, EVT VT); // Call handling routines. private: @@ -615,11 +618,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { if (VT != MVT::i32) return 0; Reloc::Model RelocM = TM.getRelocationModel(); - - // TODO: Need more magic for ARM PIC. - if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0; - - unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM); + const TargetRegisterClass *RC = isThumb2 ? + (const TargetRegisterClass*)&ARM::rGPRRegClass : + (const TargetRegisterClass*)&ARM::GPRRegClass; + unsigned DestReg = createResultReg(RC); // Use movw+movt when possible, it avoids constant pool entries. // Darwin targets don't support movt with Reloc::Static, see @@ -649,6 +652,9 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { Align = TD.getTypeAllocSize(GV->getType()); } + if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_) + return ARMLowerPICELF(GV, Align, VT); + // Grab index. unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); @@ -666,17 +672,30 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { .addConstantPoolIndex(Idx); if (RelocM == Reloc::PIC_) MIB.addImm(Id); + AddOptionalDefs(MIB); } else { // The extra immediate is for addrmode2. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg) .addConstantPoolIndex(Idx) .addImm(0); + AddOptionalDefs(MIB); + + if (RelocM == Reloc::PIC_) { + unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD; + unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); + + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc), NewDestReg) + .addReg(DestReg) + .addImm(Id); + AddOptionalDefs(MIB); + return NewDestReg; + } } - AddOptionalDefs(MIB); } - if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { + if (IsIndirect) { MachineInstrBuilder MIB; unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); if (isThumb2) @@ -1009,6 +1028,9 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, RC = &ARM::GPRRegClass; break; case MVT::i16: + if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem()) + return false; + if (isThumb2) { if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8; @@ -1021,6 +1043,9 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, RC = &ARM::GPRRegClass; break; case MVT::i32: + if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem()) + return false; + if (isThumb2) { if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) Opc = ARM::t2LDRi8; @@ -1127,6 +1152,9 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, } break; case MVT::i16: + if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem()) + return false; + if (isThumb2) { if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) StrOpc = ARM::t2STRHi8; @@ -1138,6 +1166,9 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, } break; case MVT::i32: + if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem()) + return false; + if (isThumb2) { if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) StrOpc = ARM::t2STRi8; @@ -1360,6 +1391,11 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) .addReg(AddrReg)); + + const IndirectBrInst *IB = cast<IndirectBrInst>(I); + for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i) + FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]); + return true; } @@ -2210,25 +2246,17 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)); - if (isThumb2) { - // Explicitly adding the predicate here. + // BL / BLX don't take a predicate, but tBL / tBLX do. + if (isThumb2) AddDefaultPred(MIB); - if (EnableARMLongCalls) - MIB.addReg(CalleeReg); - else - MIB.addExternalSymbol(TLI.getLibcallName(Call)); - } else { - if (EnableARMLongCalls) - MIB.addReg(CalleeReg); - else - MIB.addExternalSymbol(TLI.getLibcallName(Call)); + if (EnableARMLongCalls) + MIB.addReg(CalleeReg); + else + MIB.addExternalSymbol(TLI.getLibcallName(Call)); - // Explicitly adding the predicate here. - AddDefaultPred(MIB); - } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i]); + MIB.addReg(RegArgs[i], RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). @@ -2300,16 +2328,16 @@ bool ARMFastISel::SelectCall(const Instruction *I, ISD::ArgFlagsTy Flags; unsigned AttrInd = i - CS.arg_begin() + 1; - if (CS.paramHasAttr(AttrInd, Attribute::SExt)) + if (CS.paramHasAttr(AttrInd, Attributes::SExt)) Flags.setSExt(); - if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) + if (CS.paramHasAttr(AttrInd, Attributes::ZExt)) Flags.setZExt(); // FIXME: Only handle *easy* calls for now. - if (CS.paramHasAttr(AttrInd, Attribute::InReg) || - CS.paramHasAttr(AttrInd, Attribute::StructRet) || - CS.paramHasAttr(AttrInd, Attribute::Nest) || - CS.paramHasAttr(AttrInd, Attribute::ByVal)) + if (CS.paramHasAttr(AttrInd, Attributes::InReg) || + CS.paramHasAttr(AttrInd, Attributes::StructRet) || + CS.paramHasAttr(AttrInd, Attributes::Nest) || + CS.paramHasAttr(AttrInd, Attributes::ByVal)) return false; Type *ArgTy = (*i)->getType(); @@ -2356,30 +2384,20 @@ bool ARMFastISel::SelectCall(const Instruction *I, unsigned CallOpc = ARMSelectCallOp(UseReg); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)); - if(isThumb2) { - // Explicitly adding the predicate here. - AddDefaultPred(MIB); - if (UseReg) - MIB.addReg(CalleeReg); - else if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, 0); - else - MIB.addExternalSymbol(IntrMemName, 0); - } else { - if (UseReg) - MIB.addReg(CalleeReg); - else if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, 0); - else - MIB.addExternalSymbol(IntrMemName, 0); - // Explicitly adding the predicate here. + // ARM calls don't take a predicate, but tBL / tBLX do. + if(isThumb2) AddDefaultPred(MIB); - } + if (UseReg) + MIB.addReg(CalleeReg); + else if (!IntrMemName) + MIB.addGlobalAddress(GV, 0, 0); + else + MIB.addExternalSymbol(IntrMemName, 0); // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i]); + MIB.addReg(RegArgs[i], RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). @@ -2648,7 +2666,7 @@ bool ARMFastISel::SelectShift(const Instruction *I, unsigned Reg1 = getRegForValue(Src1Value); if (Reg1 == 0) return false; - unsigned Reg2; + unsigned Reg2 = 0; if (Opc == ARM::MOVsr) { Reg2 = getRegForValue(Src2Value); if (Reg2 == 0) return false; @@ -2790,6 +2808,47 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, return true; } +unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, + unsigned Align, EVT VT) { + bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); + ARMConstantPoolConstant *CPV = + ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); + unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); + + unsigned Opc; + unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT)); + // Load value. + if (isThumb2) { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::t2LDRpci), DestReg1) + .addConstantPoolIndex(Idx)); + Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs; + } else { + // The extra immediate is for addrmode2. + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(ARM::LDRcp), DestReg1) + .addConstantPoolIndex(Idx).addImm(0)); + Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs; + } + + unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); + if (GlobalBaseReg == 0) { + GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT)); + AFI->setGlobalBaseReg(GlobalBaseReg); + } + + unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT)); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc), DestReg2) + .addReg(DestReg1) + .addReg(GlobalBaseReg); + if (!UseGOTOFF) + MIB.addImm(0); + AddOptionalDefs(MIB); + + return DestReg2; +} + namespace llvm { FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index aee72d2..9392497 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -153,7 +153,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { int FramePtrSpillFI = 0; int D8SpillFI = 0; - // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue. + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; @@ -360,7 +361,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); - // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue. + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; @@ -1151,7 +1153,7 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { return; // Naked functions don't spill callee-saved registers. - if (MF.getFunction()->hasFnAttr(Attribute::Naked)) + if (MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked)) return; // We are planning to use NEON instructions vst1 / vld1. @@ -1176,7 +1178,7 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned NumSpills = 0; for (; NumSpills < 8; ++NumSpills) - if (!MRI.isPhysRegOrOverlapUsed(ARM::D8 + NumSpills)) + if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills)) break; // Don't do this for just one d-register. It's not worth it. @@ -1209,6 +1211,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned FramePtr = RegInfo->getFrameRegister(MF); // Spill R4 if Thumb2 function requires stack realignment - it will be used as @@ -1218,12 +1221,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // FIXME: It will be better just to find spare register here. if (AFI->isThumb2Function() && (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) - MF.getRegInfo().setPhysRegUsed(ARM::R4); + MRI.setPhysRegUsed(ARM::R4); if (AFI->isThumb1OnlyFunction()) { // Spill LR if Thumb1 function uses variable length argument lists. if (AFI->getVarArgsRegSaveSize() > 0) - MF.getRegInfo().setPhysRegUsed(ARM::LR); + MRI.setPhysRegUsed(ARM::LR); // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know // for sure what the stack size will be, but for this, an estimate is good @@ -1233,7 +1236,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // FIXME: It will be better just to find spare register here. unsigned StackSize = estimateStackSize(MF); if (MFI->hasVarSizedObjects() || StackSize > 508) - MF.getRegInfo().setPhysRegUsed(ARM::R4); + MRI.setPhysRegUsed(ARM::R4); } // See if we can spill vector registers to aligned stack. @@ -1241,7 +1244,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Spill the BasePtr if it's used. if (RegInfo->hasBasePointer(MF)) - MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); + MRI.setPhysRegUsed(RegInfo->getBaseRegister()); // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. @@ -1249,7 +1252,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; - if (MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) { + if (MRI.isPhysRegUsed(Reg)) { Spilled = true; CanEliminateFrame = false; } @@ -1338,7 +1341,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. // Spill LR as well so we can fold BX_RET to the registers restore (LDM). if (!LRSpilled && CS1Spilled) { - MF.getRegInfo().setPhysRegUsed(ARM::LR); + MRI.setPhysRegUsed(ARM::LR); NumGPRSpills++; UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); @@ -1347,7 +1350,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (hasFP(MF)) { - MF.getRegInfo().setPhysRegUsed(FramePtr); + MRI.setPhysRegUsed(FramePtr); NumGPRSpills++; } @@ -1362,16 +1365,16 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Don't spill high register if the function is thumb1 if (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || Reg == ARM::LR) { - MF.getRegInfo().setPhysRegUsed(Reg); - if (!RegInfo->isReservedReg(MF, Reg)) + MRI.setPhysRegUsed(Reg); + if (!MRI.isReserved(Reg)) ExtraCSSpill = true; break; } } } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { unsigned Reg = UnspilledCS2GPRs.front(); - MF.getRegInfo().setPhysRegUsed(Reg); - if (!RegInfo->isReservedReg(MF, Reg)) + MRI.setPhysRegUsed(Reg); + if (!MRI.isReserved(Reg)) ExtraCSSpill = true; } } @@ -1389,7 +1392,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, while (NumExtras && !UnspilledCS1GPRs.empty()) { unsigned Reg = UnspilledCS1GPRs.back(); UnspilledCS1GPRs.pop_back(); - if (!RegInfo->isReservedReg(MF, Reg) && + if (!MRI.isReserved(Reg) && (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || Reg == ARM::LR)) { Extras.push_back(Reg); @@ -1401,7 +1404,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, while (NumExtras && !UnspilledCS2GPRs.empty()) { unsigned Reg = UnspilledCS2GPRs.back(); UnspilledCS2GPRs.pop_back(); - if (!RegInfo->isReservedReg(MF, Reg)) { + if (!MRI.isReserved(Reg)) { Extras.push_back(Reg); NumExtras--; } @@ -1409,7 +1412,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (Extras.size() && NumExtras == 0) { for (unsigned i = 0, e = Extras.size(); i != e; ++i) { - MF.getRegInfo().setPhysRegUsed(Extras[i]); + MRI.setPhysRegUsed(Extras[i]); } } else if (!AFI->isThumb1OnlyFunction()) { // note: Thumb1 functions spill to R12, not the stack. Reserve a slot @@ -1423,7 +1426,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (ForceLRSpill) { - MF.getRegInfo().setPhysRegUsed(ARM::LR); + MRI.setPhysRegUsed(ARM::LR); AFI->setLRIsSpilledForFarJump(true); } } diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp index a5fd15b..1240169 100644 --- a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -47,7 +47,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { // Skip over one non-VFP / NEON instruction. if (!LastMI->isBarrier() && // On A9, AGU and NEON/FPU are muxed. - !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) && + !(STI.isLikeA9() && (LastMI->mayLoad() || LastMI->mayStore())) && (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index a3a6c31..efd6d2b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -239,7 +239,6 @@ private: /// SelectCMOVOp - Select CMOV instructions for ARM. SDNode *SelectCMOVOp(SDNode *N); - SDNode *SelectConditionalOp(SDNode *N); SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); @@ -306,7 +305,7 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { } /// \brief Check whether a particular node is a constant value representable as -/// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax). +/// (N * Scale) where (N in [\p RangeMin, \p RangeMax). /// /// \param ScaledConstant [out] - On success, the pre-scaled constant value. static bool isScaledConstantInRange(SDValue Node, int Scale, @@ -337,7 +336,8 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (!CheckVMLxHazard) return true; - if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9()) + if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9() && + !Subtarget->isSwift()) return true; if (!N->hasOneUse()) @@ -375,12 +375,13 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt) { - if (!Subtarget->isCortexA9()) + if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) return true; if (Shift.hasOneUse()) return true; // R << 2 is free. - return ShOpcVal == ARM_AM::lsl && ShAmt == 2; + return ShOpcVal == ARM_AM::lsl && + (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); } bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, @@ -487,7 +488,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { if (N.getOpcode() == ISD::MUL && - (!Subtarget->isCortexA9() || N.hasOneUse())) { + ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { // X * [3,5,9] -> X + X * [2,4,8] etc. int RHSC = (int)RHS->getZExtValue(); @@ -551,7 +552,8 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, // Try matching (R shl C) + (R). if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && - !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) { + !(Subtarget->isLikeA9() || Subtarget->isSwift() || + N.getOperand(0).hasOneUse())) { ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't @@ -585,7 +587,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, SDValue &Offset, SDValue &Opc) { if (N.getOpcode() == ISD::MUL && - (!Subtarget->isCortexA9() || N.hasOneUse())) { + (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { // X * [3,5,9] -> X + X * [2,4,8] etc. int RHSC = (int)RHS->getZExtValue(); @@ -651,7 +653,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, } } - if (Subtarget->isCortexA9() && !N.hasOneUse()) { + if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) { // Compute R +/- (R << N) and reuse it. Base = N; Offset = CurDAG->getRegister(0, MVT::i32); @@ -689,7 +691,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, // Try matching (R shl C) + (R). if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && - !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) { + !(Subtarget->isLikeA9() || Subtarget->isSwift() || + N.getOperand(0).hasOneUse())) { ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't @@ -2363,121 +2366,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } -SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) { - SDValue FalseVal = N->getOperand(0); - SDValue TrueVal = N->getOperand(1); - ARMCC::CondCodes CCVal = - (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); - SDValue CCR = N->getOperand(3); - assert(CCR.getOpcode() == ISD::Register); - SDValue InFlag = N->getOperand(4); - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - - if (Subtarget->isThumb()) { - SDValue CPTmp0; - SDValue CPTmp1; - if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::t2ANDCCrs; break; - case ARMISD::COR: Opc = ARM::t2ORRCCrs; break; - case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break; - } - SDValue Ops[] = { - FalseVal, FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag - }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8); - } - - ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); - if (T) { - unsigned TrueImm = T->getZExtValue(); - if (is_t2_so_imm(TrueImm)) { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::t2ANDCCri; break; - case ARMISD::COR: Opc = ARM::t2ORRCCri; break; - case ARMISD::CXOR: Opc = ARM::t2EORCCri; break; - } - SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); - SDValue Ops[] = { FalseVal, FalseVal, True, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); - } - } - - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::t2ANDCCrr; break; - case ARMISD::COR: Opc = ARM::t2ORRCCrr; break; - case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break; - } - SDValue Ops[] = { FalseVal, FalseVal, TrueVal, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); - } - - SDValue CPTmp0; - SDValue CPTmp1; - SDValue CPTmp2; - if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::ANDCCrsi; break; - case ARMISD::COR: Opc = ARM::ORRCCrsi; break; - case ARMISD::CXOR: Opc = ARM::EORCCrsi; break; - } - SDValue Ops[] = { - FalseVal, FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag - }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8); - } - - if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::ANDCCrsr; break; - case ARMISD::COR: Opc = ARM::ORRCCrsr; break; - case ARMISD::CXOR: Opc = ARM::EORCCrsr; break; - } - SDValue Ops[] = { - FalseVal, FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag - }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 9); - } - - ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); - if (T) { - unsigned TrueImm = T->getZExtValue(); - if (is_so_imm(TrueImm)) { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::ANDCCri; break; - case ARMISD::COR: Opc = ARM::ORRCCri; break; - case ARMISD::CXOR: Opc = ARM::EORCCri; break; - } - SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); - SDValue Ops[] = { FalseVal, FalseVal, True, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); - } - } - - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::ANDCCrr; break; - case ARMISD::COR: Opc = ARM::ORRCCrr; break; - case ARMISD::CXOR: Opc = ARM::EORCCrr; break; - } - SDValue Ops[] = { FalseVal, FalseVal, TrueVal, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); -} - /// Target-specific DAG combining for ISD::XOR. /// Target-independent combining lowers SELECT_CC nodes of the form /// select_cc setg[ge] X, 0, X, -X @@ -2753,6 +2641,38 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { dl, MVT::i32, MVT::i32, Ops, 5); } } + case ARMISD::UMLAL:{ + if (Subtarget->isThumb()) { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32)}; + return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6); + }else{ + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? + ARM::UMLAL : ARM::UMLALv5, + dl, MVT::i32, MVT::i32, Ops, 7); + } + } + case ARMISD::SMLAL:{ + if (Subtarget->isThumb()) { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32)}; + return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6); + }else{ + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? + ARM::SMLAL : ARM::SMLALv5, + dl, MVT::i32, MVT::i32, Ops, 7); + } + } case ISD::LOAD: { SDNode *ResNode = 0; if (Subtarget->isThumb() && Subtarget->hasThumb2()) @@ -2805,10 +2725,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::CMOV: return SelectCMOVOp(N); - case ARMISD::CAND: - case ARMISD::COR: - case ARMISD::CXOR: - return SelectConditionalOp(N); case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index 190ca07..ff99b04 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -122,6 +122,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT, Custom); @@ -514,6 +515,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); setOperationAction(ISD::FEXP, MVT::v4f32, Expand); setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); + setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); @@ -566,6 +568,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } + // ARM and Thumb2 support UMLAL/SMLAL. + if (!Subtarget->isThumb1Only()) + setTargetDAGCombine(ISD::ADDC); + + computeRegisterProperties(); // ARM does not have f32 extending load. @@ -629,9 +636,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); - // These are expanded into libcalls. - if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) { - // v7M has a hardware divider + if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && + !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { + // These are expanded into libcalls if the cpu doesn't have HW divider. setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); } @@ -791,12 +798,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); - - if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) { - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::OR); - setTargetDAGCombine(ISD::XOR); - } + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::XOR); if (Subtarget->hasV6Ops()) setTargetDAGCombine(ISD::SRL); @@ -821,7 +825,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) benefitFromCodePlacementOpt = true; // Prefer likely predicted branches to selects on out-of-order cores. - predictableSelectIsExpensive = Subtarget->isCortexA9(); + predictableSelectIsExpensive = Subtarget->isLikeA9(); setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } @@ -898,9 +902,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; case ARMISD::CMOV: return "ARMISD::CMOV"; - case ARMISD::CAND: return "ARMISD::CAND"; - case ARMISD::COR: return "ARMISD::COR"; - case ARMISD::CXOR: return "ARMISD::CXOR"; case ARMISD::RBIT: return "ARMISD::RBIT"; @@ -984,6 +985,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VTBL2: return "ARMISD::VTBL2"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; + case ARMISD::UMLAL: return "ARMISD::UMLAL"; + case ARMISD::SMLAL: return "ARMISD::SMLAL"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; @@ -1591,19 +1594,19 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // FIXME: handle tail calls differently. unsigned CallOpc; + bool HasMinSizeAttr = MF.getFunction()->getFnAttributes(). + hasAttribute(Attributes::MinSize); if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; - else if (doesNotRet && isDirect && !isARMFunc && - Subtarget->hasRAS() && !Subtarget->isThumb1Only()) - // "mov lr, pc; b _foo" to avoid confusing the RSP - CallOpc = ARMISD::CALL_NOLINK; else CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; } else { - if (!isDirect && !Subtarget->hasV5TOps()) { + if (!isDirect && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; - } else if (doesNotRet && isDirect && Subtarget->hasRAS()) + else if (doesNotRet && isDirect && Subtarget->hasRAS() && + // Emit regular call when code size is the priority + !HasMinSizeAttr) // "mov lr, pc; b _foo" to avoid confusing the RSP CallOpc = ARMISD::CALL_NOLINK; else @@ -1653,22 +1656,31 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, /// and then confiscate the rest of the parameter registers to insure /// this. void -ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { +ARMTargetLowering::HandleByVal( + CCState *State, unsigned &size, unsigned Align) const { unsigned reg = State->AllocateReg(GPRArgRegs, 4); assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && "unhandled ParmContext"); if ((!State->isFirstByValRegValid()) && (ARM::R0 <= reg) && (reg <= ARM::R3)) { - State->setFirstByValReg(reg); - // At a call site, a byval parameter that is split between - // registers and memory needs its size truncated here. In a - // function prologue, such byval parameters are reassembled in - // memory, and are not truncated. - if (State->getCallOrPrologue() == Call) { - unsigned excess = 4 * (ARM::R4 - reg); - assert(size >= excess && "expected larger existing stack allocation"); - size -= excess; + if (Subtarget->isAAPCS_ABI() && Align > 4) { + unsigned AlignInRegs = Align / 4; + unsigned Waste = (ARM::R4 - reg) % AlignInRegs; + for (unsigned i = 0; i < Waste; ++i) + reg = State->AllocateReg(GPRArgRegs, 4); + } + if (reg != 0) { + State->setFirstByValReg(reg); + // At a call site, a byval parameter that is split between + // registers and memory needs its size truncated here. In a + // function prologue, such byval parameters are reassembled in + // memory, and are not truncated. + if (State->getCallOrPrologue() == Call) { + unsigned excess = 4 * (ARM::R4 - reg); + assert(size >= excess && "expected larger existing stack allocation"); + size -= excess; + } } } // Confiscate any remaining parameter registers to preclude their @@ -1801,6 +1813,14 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } } + // If Caller's vararg or byval argument has been split between registers and + // stack, do not perform tail call, since part of the argument is in caller's + // local frame. + const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction(). + getInfo<ARMFunctionInfo>(); + if (AFI_Caller->getVarArgsRegSaveSize()) + return false; + // If the callee takes no arguments then go on to check the results of the // call. if (!Outs.empty()) { @@ -2532,7 +2552,10 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, DebugLoc dl, SDValue &Chain, - unsigned ArgOffset) const { + const Value *OrigArg, + unsigned OffsetFromOrigArg, + unsigned ArgOffset, + bool ForceMutable) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -2559,7 +2582,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, getPointerTy()); SmallVector<SDValue, 4> MemOps; - for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { + for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) { const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = &ARM::tGPRRegClass; @@ -2570,7 +2593,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), + MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i), false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, @@ -2581,7 +2604,8 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, &MemOps[0], MemOps.size()); } else // This will point to the next argument passed via stack. - AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); + AFI->setVarArgsFrameIndex( + MFI->CreateFixedObject(4, ArgOffset, !ForceMutable)); } SDValue @@ -2604,14 +2628,16 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv, /* Return*/ false, isVarArg)); - + SmallVector<SDValue, 16> ArgValues; int lastInsIndex = -1; - SDValue ArgValue; + Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); + unsigned CurArgIdx = 0; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - + std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); + CurArgIdx = Ins[VA.getValNo()].OrigArgIndex; // Arguments stored in registers. if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); @@ -2705,14 +2731,20 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Since they could be overwritten by lowering of arguments in case of // a tail call. if (Flags.isByVal()) { - unsigned VARegSize, VARegSaveSize; - computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); - VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0); - unsigned Bytes = Flags.getByValSize() - VARegSize; - if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. - int FI = MFI->CreateFixedObject(Bytes, - VA.getLocMemOffset(), false); - InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (!AFI->getVarArgsFrameIndex()) { + VarArgStyleRegisters(CCInfo, DAG, + dl, Chain, CurOrigArg, + Ins[VA.getValNo()].PartOffset, + VA.getLocMemOffset(), + true /*force mutable frames*/); + int VAFrameIndex = AFI->getVarArgsFrameIndex(); + InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy())); + } else { + int FI = MFI->CreateFixedObject(Flags.getByValSize(), + VA.getLocMemOffset(), false); + InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); + } } else { int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, VA.getLocMemOffset(), true); @@ -2730,7 +2762,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // varargs if (isVarArg) - VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset()); + VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0, + CCInfo.getNextStackOffset()); return Chain; } @@ -3890,6 +3923,36 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, return SDValue(); } +// check if an VEXT instruction can handle the shuffle mask when the +// vector sources of the shuffle are the same. +static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) { + unsigned NumElts = VT.getVectorNumElements(); + + // Assume that the first shuffle index is not UNDEF. Fail if it is. + if (M[0] < 0) + return false; + + Imm = M[0]; + + // If this is a VEXT shuffle, the immediate value is the index of the first + // element. The other shuffle indices must be the successive elements after + // the first one. + unsigned ExpectedElt = Imm; + for (unsigned i = 1; i < NumElts; ++i) { + // Increment the expected index. If it wraps around, just follow it + // back to index zero and keep going. + ++ExpectedElt; + if (ExpectedElt == NumElts) + ExpectedElt = 0; + + if (M[i] < 0) continue; // ignore UNDEF indices + if (ExpectedElt != static_cast<unsigned>(M[i])) + return false; + } + + return true; +} + static bool isVEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { @@ -4157,10 +4220,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } // Scan through the operands to see if only one value is used. + // + // As an optimisation, even if more than one value is used it may be more + // profitable to splat with one value then change some lanes. + // + // Heuristically we decide to do this if the vector has a "dominant" value, + // defined as splatted to more than half of the lanes. unsigned NumElts = VT.getVectorNumElements(); bool isOnlyLowElement = true; bool usesOnlyOneValue = true; + bool hasDominantValue = false; bool isConstant = true; + + // Map of the number of times a particular SDValue appears in the + // element list. + DenseMap<SDValue, unsigned> ValueCounts; SDValue Value; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); @@ -4171,13 +4245,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) isConstant = false; - if (!Value.getNode()) + ValueCounts.insert(std::make_pair(V, 0)); + unsigned &Count = ValueCounts[V]; + + // Is this value dominant? (takes up more than half of the lanes) + if (++Count > (NumElts / 2)) { + hasDominantValue = true; Value = V; - else if (V != Value) - usesOnlyOneValue = false; + } } + if (ValueCounts.size() != 1) + usesOnlyOneValue = false; + if (!Value.getNode() && ValueCounts.size() > 0) + Value = ValueCounts.begin()->first; - if (!Value.getNode()) + if (ValueCounts.size() == 0) return DAG.getUNDEF(VT); if (isOnlyLowElement) @@ -4187,9 +4269,51 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // Use VDUP for non-constant splats. For f32 constant splats, reduce to // i32 and try again. - if (usesOnlyOneValue && EltSize <= 32) { - if (!isConstant) - return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + if (hasDominantValue && EltSize <= 32) { + if (!isConstant) { + SDValue N; + + // If we are VDUPing a value that comes directly from a vector, that will + // cause an unnecessary move to and from a GPR, where instead we could + // just use VDUPLANE. + if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + // We need to create a new undef vector to use for the VDUPLANE if the + // size of the vector from which we get the value is different than the + // size of the vector that we need to create. We will insert the element + // such that the register coalescer will remove unnecessary copies. + if (VT != Value->getOperand(0).getValueType()) { + ConstantSDNode *constIndex; + constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)); + assert(constIndex && "The index is not a constant!"); + unsigned index = constIndex->getAPIntValue().getLimitedValue() % + VT.getVectorNumElements(); + N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, + DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT), + Value, DAG.getConstant(index, MVT::i32)), + DAG.getConstant(index, MVT::i32)); + } else { + N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, + Value->getOperand(0), Value->getOperand(1)); + } + } + else + N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); + + if (!usesOnlyOneValue) { + // The dominant value was splatted as 'N', but we now have to insert + // all differing elements. + for (unsigned I = 0; I < NumElts; ++I) { + if (Op.getOperand(I) == Value) + continue; + SmallVector<SDValue, 3> Ops; + Ops.push_back(N); + Ops.push_back(Op.getOperand(I)); + Ops.push_back(DAG.getConstant(I, MVT::i32)); + N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); + } + } + return N; + } if (VT.getVectorElementType().isFloatingPoint()) { SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumElts; ++i) @@ -4201,9 +4325,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); } - SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); - if (Val.getNode()) - return DAG.getNode(ARMISD::VDUP, dl, VT, Val); + if (usesOnlyOneValue) { + SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); + if (isConstant && Val.getNode()) + return DAG.getNode(ARMISD::VDUP, dl, VT, Val); + } } // If all elements are constants and the case above didn't get hit, fall back @@ -4586,6 +4712,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (isVREVMask(ShuffleMask, VT, 16)) return DAG.getNode(ARMISD::VREV16, dl, VT, V1); + if (V2->getOpcode() == ISD::UNDEF && + isSingletonVEXTMask(ShuffleMask, VT, Imm)) { + return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1, + DAG.getConstant(Imm, MVT::i32)); + } + // Check for Neon shuffles that modify both input vectors in place. // If both results are used, i.e., if there are two shuffles with the same // source operands and with masks corresponding to both results of one of @@ -5421,7 +5553,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, exitMBB->transferSuccessorsAndUpdatePHIs(BB); const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::rGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); @@ -5532,7 +5664,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, exitMBB->transferSuccessorsAndUpdatePHIs(BB); const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::rGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = MRI.createVirtualRegister(TRC); @@ -5546,7 +5678,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, // ldrex dest, ptr // (sign extend dest, if required) // cmp dest, incr - // cmov.cond scratch2, dest, incr + // cmov.cond scratch2, incr, dest // strex scratch, scratch2, ptr // cmp scratch, #0 // bne- loopMBB @@ -5569,7 +5701,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(oldval).addReg(incr)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) - .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); + .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR); MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); if (strOpc == ARM::t2STREX) @@ -5939,12 +6071,15 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4); - if (AFI->isThumb1OnlyFunction()) - BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup)); - else if (!Subtarget->hasVFP2()) - BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp)); - else - BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); + MachineInstrBuilder MIB; + MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); + + const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); + const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); + + // Add a register mask with no preserved registers. This results in all + // registers being marked as clobbered. + MIB.addRegMask(RI.getNoPreservedMask()); unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { @@ -6016,9 +6151,9 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. - unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); + unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); if (Align == 0) - Align = getTargetData()->getTypeAllocSize(C->getType()); + Align = getDataLayout()->getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); @@ -6105,9 +6240,9 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. - unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); + unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); if (Align == 0) - Align = getTargetData()->getTypeAllocSize(C->getType()); + Align = getDataLayout()->getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); @@ -6154,18 +6289,15 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { } // Add the jump table entries as successors to the MBB. - MachineBasicBlock *PrevMBB = 0; + SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs; for (std::vector<MachineBasicBlock*>::iterator I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { MachineBasicBlock *CurMBB = *I; - if (PrevMBB != CurMBB) + if (SeenMBBs.insert(CurMBB)) DispContBB->addSuccessor(CurMBB); - PrevMBB = CurMBB; } // N.B. the order the invoke BBs are processed in doesn't matter here. - const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); - const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector<MachineBasicBlock*, 64> MBBLPads; for (SmallPtrSet<MachineBasicBlock*, 64>::iterator @@ -6279,7 +6411,8 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { UnitSize = 2; } else { // Check whether we can use NEON instructions. - if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) && + if (!MF->getFunction()->getFnAttributes(). + hasAttribute(Attributes::NoImplicitFloat) && Subtarget->hasNEON()) { if ((Align % 16 == 0) && SizeVal >= 16) { ldrOpc = ARM::VLD1q32wb_fixed; @@ -6364,7 +6497,8 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { } else { AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); + .addReg(srcOut, RegState::Define).addReg(srcIn) + .addReg(0).addImm(1)); AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) .addReg(scratch).addReg(destIn) @@ -6427,9 +6561,9 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { const Constant *C = ConstantInt::get(Int32Ty, LoopSize); // MachineConstantPool wants an explicit alignment. - unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); + unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); if (Align == 0) - Align = getTargetData()->getTypeAllocSize(C->getType()); + Align = getDataLayout()->getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp)) @@ -6981,73 +7115,131 @@ static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { return AllOnes ? C->isAllOnesValue() : C->isNullValue(); } +// Return true if N is conditionally 0 or all ones. +// Detects these expressions where cc is an i1 value: +// +// (select cc 0, y) [AllOnes=0] +// (select cc y, 0) [AllOnes=0] +// (zext cc) [AllOnes=0] +// (sext cc) [AllOnes=0/1] +// (select cc -1, y) [AllOnes=1] +// (select cc y, -1) [AllOnes=1] +// +// Invert is set when N is the null/all ones constant when CC is false. +// OtherOp is set to the alternative value of N. +static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, + SDValue &CC, bool &Invert, + SDValue &OtherOp, + SelectionDAG &DAG) { + switch (N->getOpcode()) { + default: return false; + case ISD::SELECT: { + CC = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + if (isZeroOrAllOnes(N1, AllOnes)) { + Invert = false; + OtherOp = N2; + return true; + } + if (isZeroOrAllOnes(N2, AllOnes)) { + Invert = true; + OtherOp = N1; + return true; + } + return false; + } + case ISD::ZERO_EXTEND: + // (zext cc) can never be the all ones value. + if (AllOnes) + return false; + // Fall through. + case ISD::SIGN_EXTEND: { + EVT VT = N->getValueType(0); + CC = N->getOperand(0); + if (CC.getValueType() != MVT::i1) + return false; + Invert = !AllOnes; + if (AllOnes) + // When looking for an AllOnes constant, N is an sext, and the 'other' + // value is 0. + OtherOp = DAG.getConstant(0, VT); + else if (N->getOpcode() == ISD::ZERO_EXTEND) + // When looking for a 0 constant, N can be zext or sext. + OtherOp = DAG.getConstant(1, VT); + else + OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + return true; + } + } +} + // Combine a constant select operand into its use: // -// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) -// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) +// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) +// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) +// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1] +// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) +// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) // // The transform is rejected if the select doesn't have a constant operand that -// is null. +// is null, or all ones when AllOnes is set. +// +// Also recognize sext/zext from i1: +// +// (add (zext cc), x) -> (select cc (add x, 1), x) +// (add (sext cc), x) -> (select cc (add x, -1), x) +// +// These transformations eventually create predicated instructions. // // @param N The node to transform. // @param Slct The N operand that is a select. // @param OtherOp The other N operand (x above). // @param DCI Context. +// @param AllOnes Require the select constant to be all ones instead of null. // @returns The new node, or SDValue() on failure. static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + bool AllOnes = false) { SelectionDAG &DAG = DCI.DAG; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = N->getValueType(0); - unsigned Opc = N->getOpcode(); - bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; - SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); - SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); - ISD::CondCode CC = ISD::SETCC_INVALID; - - if (isSlctCC) { - CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); - } else { - SDValue CCOp = Slct.getOperand(0); - if (CCOp.getOpcode() == ISD::SETCC) - CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); - } - - bool DoXform = false; - bool InvCC = false; - assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && - "Bad input!"); + SDValue NonConstantVal; + SDValue CCOp; + bool SwapSelectOps; + if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps, + NonConstantVal, DAG)) + return SDValue(); - if (isZeroOrAllOnes(LHS, false)) { - DoXform = true; - } else if (CC != ISD::SETCC_INVALID && isZeroOrAllOnes(RHS, false)) { - std::swap(LHS, RHS); - SDValue Op0 = Slct.getOperand(0); - EVT OpVT = isSlctCC ? Op0.getValueType() : Op0.getOperand(0).getValueType(); - bool isInt = OpVT.isInteger(); - CC = ISD::getSetCCInverse(CC, isInt); + // Slct is now know to be the desired identity constant when CC is true. + SDValue TrueVal = OtherOp; + SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + OtherOp, NonConstantVal); + // Unless SwapSelectOps says CC should be false. + if (SwapSelectOps) + std::swap(TrueVal, FalseVal); - if (!TLI.isCondCodeLegal(CC, OpVT)) - return SDValue(); // Inverse operator isn't legal. + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, + CCOp, TrueVal, FalseVal); +} - DoXform = true; - InvCC = true; +// Attempt combineSelectAndUse on each operand of a commutative operator N. +static +SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, + TargetLowering::DAGCombinerInfo &DCI) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (N0.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes); + if (Result.getNode()) + return Result; } - - if (!DoXform) - return SDValue(); - - SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); - if (isSlctCC) - return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, - Slct.getOperand(0), Slct.getOperand(1), CC); - SDValue CCOp = Slct.getOperand(0); - if (InvCC) - CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), - CCOp.getOperand(0), CCOp.getOperand(1), CC); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, - CCOp, OtherOp, Result); + if (N1.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes); + if (Result.getNode()) + return Result; + } + return SDValue(); } // AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction @@ -7139,6 +7331,154 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp); } +static SDValue findMUL_LOHI(SDValue V) { + if (V->getOpcode() == ISD::UMUL_LOHI || + V->getOpcode() == ISD::SMUL_LOHI) + return V; + return SDValue(); +} + +static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + + if (Subtarget->isThumb1Only()) return SDValue(); + + // Only perform the checks after legalize when the pattern is available. + if (DCI.isBeforeLegalize()) return SDValue(); + + // Look for multiply add opportunities. + // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where + // each add nodes consumes a value from ISD::UMUL_LOHI and there is + // a glue link from the first add to the second add. + // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by + // a S/UMLAL instruction. + // loAdd UMUL_LOHI + // \ / :lo \ :hi + // \ / \ [no multiline comment] + // ADDC | hiAdd + // \ :glue / / + // \ / / + // ADDE + // + assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"); + SDValue AddcOp0 = AddcNode->getOperand(0); + SDValue AddcOp1 = AddcNode->getOperand(1); + + // Check if the two operands are from the same mul_lohi node. + if (AddcOp0.getNode() == AddcOp1.getNode()) + return SDValue(); + + assert(AddcNode->getNumValues() == 2 && + AddcNode->getValueType(0) == MVT::i32 && + AddcNode->getValueType(1) == MVT::Glue && + "Expect ADDC with two result values: i32, glue"); + + // Check that the ADDC adds the low result of the S/UMUL_LOHI. + if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && + AddcOp0->getOpcode() != ISD::SMUL_LOHI && + AddcOp1->getOpcode() != ISD::UMUL_LOHI && + AddcOp1->getOpcode() != ISD::SMUL_LOHI) + return SDValue(); + + // Look for the glued ADDE. + SDNode* AddeNode = AddcNode->getGluedUser(); + if (AddeNode == NULL) + return SDValue(); + + // Make sure it is really an ADDE. + if (AddeNode->getOpcode() != ISD::ADDE) + return SDValue(); + + assert(AddeNode->getNumOperands() == 3 && + AddeNode->getOperand(2).getValueType() == MVT::Glue && + "ADDE node has the wrong inputs"); + + // Check for the triangle shape. + SDValue AddeOp0 = AddeNode->getOperand(0); + SDValue AddeOp1 = AddeNode->getOperand(1); + + // Make sure that the ADDE operands are not coming from the same node. + if (AddeOp0.getNode() == AddeOp1.getNode()) + return SDValue(); + + // Find the MUL_LOHI node walking up ADDE's operands. + bool IsLeftOperandMUL = false; + SDValue MULOp = findMUL_LOHI(AddeOp0); + if (MULOp == SDValue()) + MULOp = findMUL_LOHI(AddeOp1); + else + IsLeftOperandMUL = true; + if (MULOp == SDValue()) + return SDValue(); + + // Figure out the right opcode. + unsigned Opc = MULOp->getOpcode(); + unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL; + + // Figure out the high and low input values to the MLAL node. + SDValue* HiMul = &MULOp; + SDValue* HiAdd = NULL; + SDValue* LoMul = NULL; + SDValue* LowAdd = NULL; + + if (IsLeftOperandMUL) + HiAdd = &AddeOp1; + else + HiAdd = &AddeOp0; + + + if (AddcOp0->getOpcode() == Opc) { + LoMul = &AddcOp0; + LowAdd = &AddcOp1; + } + if (AddcOp1->getOpcode() == Opc) { + LoMul = &AddcOp1; + LowAdd = &AddcOp0; + } + + if (LoMul == NULL) + return SDValue(); + + if (LoMul->getNode() != HiMul->getNode()) + return SDValue(); + + // Create the merged node. + SelectionDAG &DAG = DCI.DAG; + + // Build operand list. + SmallVector<SDValue, 8> Ops; + Ops.push_back(LoMul->getOperand(0)); + Ops.push_back(LoMul->getOperand(1)); + Ops.push_back(*LowAdd); + Ops.push_back(*HiAdd); + + SDValue MLALNode = DAG.getNode(FinalOpc, AddcNode->getDebugLoc(), + DAG.getVTList(MVT::i32, MVT::i32), + &Ops[0], Ops.size()); + + // Replace the ADDs' nodes uses by the MLA node's values. + SDValue HiMLALResult(MLALNode.getNode(), 1); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); + + SDValue LoMLALResult(MLALNode.getNode(), 0); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); + + // Return original node to notify the driver to stop replacing. + SDValue resNode(AddcNode, 0); + return resNode; +} + +/// PerformADDCCombine - Target-specific dag combine transform from +/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL. +static SDValue PerformADDCCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + + return AddCombineTo64bitMLAL(N, DCI, Subtarget); + +} + /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with /// operands N0 and N1. This is a helper for PerformADDCombine that is /// called with the default operands, and if that fails, with commuted @@ -7153,7 +7493,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, return Result; // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) - if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { + if (N0.getNode()->hasOneUse()) { SDValue Result = combineSelectAndUse(N, N0, N1, DCI); if (Result.getNode()) return Result; } @@ -7185,7 +7525,7 @@ static SDValue PerformSUBCombine(SDNode *N, SDValue N1 = N->getOperand(1); // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) - if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { + if (N1.getNode()->hasOneUse()) { SDValue Result = combineSelectAndUse(N, N1, N0, DCI); if (Result.getNode()) return Result; } @@ -7313,41 +7653,6 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); } -static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) { - return N.getOpcode() == ARMISD::CMOV && N.getNode()->hasOneUse() && - isZeroOrAllOnes(N.getOperand(0), AllOnes); -} - -/// formConditionalOp - Combine an operation with a conditional move operand -/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y) -/// (and x, (cmov -1, y, cond)) => (and.cond, x, y) -static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG, - bool Commutable) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - - bool isAND = N->getOpcode() == ISD::AND; - bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND); - if (!isCand && Commutable) { - isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND); - if (isCand) - std::swap(N0, N1); - } - if (!isCand) - return SDValue(); - - unsigned Opc = 0; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ISD::AND: Opc = ARMISD::CAND; break; - case ISD::OR: Opc = ARMISD::COR; break; - case ISD::XOR: Opc = ARMISD::CXOR; break; - } - return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0, - N1.getOperand(1), N1.getOperand(2), N1.getOperand(3), - N1.getOperand(4)); -} - static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -7382,10 +7687,10 @@ static SDValue PerformANDCombine(SDNode *N, } if (!Subtarget->isThumb1Only()) { - // (and x, (cmov -1, y, cond)) => (and.cond x, y) - SDValue CAND = formConditionalOp(N, DAG, true); - if (CAND.getNode()) - return CAND; + // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) + SDValue Result = combineSelectAndUseCommutative(N, true, DCI); + if (Result.getNode()) + return Result; } return SDValue(); @@ -7425,13 +7730,12 @@ static SDValue PerformORCombine(SDNode *N, } if (!Subtarget->isThumb1Only()) { - // (or x, (cmov 0, y, cond)) => (or.cond x, y) - SDValue COR = formConditionalOp(N, DAG, true); - if (COR.getNode()) - return COR; + // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) + SDValue Result = combineSelectAndUseCommutative(N, false, DCI); + if (Result.getNode()) + return Result; } - // The code below optimizes (or (and X, Y), Z). // The AND operand needs to have a single user to make these optimizations // profitable. @@ -7593,10 +7897,10 @@ static SDValue PerformXORCombine(SDNode *N, return SDValue(); if (!Subtarget->isThumb1Only()) { - // (xor x, (cmov 0, y, cond)) => (xor.cond x, y) - SDValue CXOR = formConditionalOp(N, DAG, true); - if (CXOR.getNode()) - return CXOR; + // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) + SDValue Result = combineSelectAndUseCommutative(N, false, DCI); + if (Result.getNode()) + return Result; } return SDValue(); @@ -8746,6 +9050,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; + case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); @@ -8807,8 +9112,8 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, } bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { - if (!Subtarget->allowsUnalignedMem()) - return false; + // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus + bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); switch (VT.getSimpleVT().SimpleTy) { default: @@ -8816,10 +9121,14 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { case MVT::i8: case MVT::i16: case MVT::i32: - return true; + // Unaligned access can use (for example) LRDB, LRDH, LDR + return AllowsUnaligned; case MVT::f64: - return Subtarget->hasNEON(); - // FIXME: VLD1 etc with standard alignment is legal. + case MVT::v2f64: + // For any little-endian targets with neon, we can support unaligned ld/st + // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. + // A big-endian target may also explictly support unaligned accesses + return Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian()); } } @@ -8838,7 +9147,7 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, // See if we can use NEON instructions for this... if (IsZeroVal && - !F->hasFnAttr(Attribute::NoImplicitFloat) && + !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat) && Subtarget->hasNEON()) { if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) { return MVT::v4i32; @@ -9632,7 +9941,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::arm_neon_vld4lane: { Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8; + uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; @@ -9657,7 +9966,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8; + NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 51d1205..4eb3b2c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -63,9 +63,6 @@ namespace llvm { FMSTAT, // ARM fmstat instruction. CMOV, // ARM conditional move instructions. - CAND, // ARM conditional and instructions. - COR, // ARM conditional or instructions. - CXOR, // ARM conditional xor instructions. BCC_i64, @@ -176,6 +173,9 @@ namespace llvm { VMULLs, // ...signed VMULLu, // ...unsigned + UMLAL, // 64bit Unsigned Accumulate Multiply + SMLAL, // 64bit Signed Accumulate Multiply + // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other // operations, but for ARM some BUILD_VECTORs are legal as-is and their @@ -260,6 +260,11 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; + virtual bool isSelectSupported(SelectSupportKind Kind) const { + // ARM does not support scalar condition selects on vectors. + return (Kind != ScalarCondVectorVal); + } + /// getSetCCResultType - Return the value type to use for ISD::SETCC. virtual EVT getSetCCResultType(EVT VT) const; @@ -461,7 +466,11 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals) const; void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, unsigned ArgOffset) + DebugLoc dl, SDValue &Chain, + const Value *OrigArg, + unsigned OffsetFromOrigArg, + unsigned ArgOffset, + bool ForceMutable = false) const; void computeRegArea(CCState &CCInfo, MachineFunction &MF, @@ -472,7 +481,7 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals) const; /// HandleByVal - Target-specific cleanup for ByVal support. - virtual void HandleByVal(CCState *, unsigned &) const; + virtual void HandleByVal(CCState *, unsigned &, unsigned) const; /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td index c8966fb..67a6820 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -846,6 +846,23 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops, let Inst{3-0} = Rm; } +// Division instructions. +class ADivA1I<bits<3> opcod, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin, + opc, asm, "", pattern> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{27-23} = 0b01110; + let Inst{22-20} = opcod; + let Inst{19-16} = Rd; + let Inst{15-12} = 0b1111; + let Inst{11-8} = Rm; + let Inst{7-4} = 0b0001; + let Inst{3-0} = Rn; +} + // PKH instructions def PKHLSLAsmOperand : ImmAsmOperand { let Name = "PKHLSLImm"; @@ -893,6 +910,10 @@ class ARMV5TPat<dag pattern, dag result> : Pat<pattern, result> { class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [IsARM, HasV5TE]; } +// ARMV5MOPat - Same as ARMV5TEPat with UseMulOps. +class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsARM, HasV5TE, UseMulOps]; +} class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [IsARM, HasV6]; } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp index 31b0c41..a0b6f24 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -13,13 +13,17 @@ #include "ARMInstrInfo.h" #include "ARM.h" +#include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" +#include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" using namespace llvm; @@ -84,3 +88,61 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } + +namespace { + /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC + /// global base register for ARM ELF. + struct ARMCGBR : public MachineFunctionPass { + static char ID; + ARMCGBR() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (AFI->getGlobalBaseReg() == 0) + return false; + + const ARMTargetMachine *TM = + static_cast<const ARMTargetMachine *>(&MF.getTarget()); + if (TM->getRelocationModel() != Reloc::PIC_) + return false; + + LLVMContext* Context = &MF.getFunction()->getContext(); + GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, + GlobalValue::ExternalLinkage, 0, + "_GLOBAL_OFFSET_TABLE_"); + unsigned Id = AFI->createPICLabelUId(); + ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id); + unsigned Align = TM->getDataLayout()->getPrefTypeAlignment(GV->getType()); + unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align); + + MachineBasicBlock &FirstMBB = MF.front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + DebugLoc DL = FirstMBB.findDebugLoc(MBBI); + unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); + unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ? + ARM::t2LDRpci : ARM::LDRcp; + const TargetInstrInfo &TII = *TM->getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL, + TII.get(Opc), GlobalBaseReg) + .addConstantPoolIndex(Idx); + if (Opc == ARM::LDRcp) + MIB.addImm(0); + AddDefaultPred(MIB); + + return true; + } + + virtual const char *getPassName() const { + return "ARM PIC Global Base Reg Initialization"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char ARMCGBR::ID = 0; +FunctionPass* +llvm::createARMGlobalBaseRegPass() { return new ARMCGBR(); } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index 992aba5..df2e55e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -83,6 +83,13 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, SDTCisInt<0>, SDTCisVT<1, i32>, SDTCisVT<4, i32>]>; + +def SDT_ARM64bitmlal : SDTypeProfile<2,4, [ SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>, + SDTCisVT<4, i32>, SDTCisVT<5, i32> ] >; +def ARMUmlal : SDNode<"ARMISD::UMLAL", SDT_ARM64bitmlal>; +def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>; @@ -90,9 +97,10 @@ def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPOptInGlue, SDNPOutGlue]>; def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" , SDT_ARMStructByVal, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, @@ -148,14 +156,16 @@ def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>; def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", - SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>; + SDT_ARMEH_SJLJ_Setjmp, + [SDNPHasChain, SDNPSideEffect]>; def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", - SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; + SDT_ARMEH_SJLJ_Longjmp, + [SDNPHasChain, SDNPSideEffect]>; def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, - [SDNPHasChain]>; + [SDNPHasChain, SDNPSideEffect]>; def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, - [SDNPHasChain]>; + [SDNPHasChain, SDNPSideEffect]>; def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH, [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; @@ -197,6 +207,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, AssemblerPredicate<"FeatureHWDiv", "divide">; +def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, + AssemblerPredicate<"FeatureHWDivARM">; def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">, AssemblerPredicate<"FeatureT2XtPk", "pack/extract">; @@ -232,6 +244,7 @@ def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; def UseMovt : Predicate<"Subtarget->useMovt()">; def DontUseMovt : Predicate<"!Subtarget->useMovt()">; def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; +def UseMulOps : Predicate<"Subtarget->useMulOps()">; // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. // But only select them if more precision in FP computation is allowed. @@ -242,6 +255,20 @@ def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " "Subtarget->isTargetDarwin()">; +// VGETLNi32 is microcoded on Swift - prefer VMOV. +def HasFastVGETLNi32 : Predicate<"!Subtarget->isSwift()">; +def HasSlowVGETLNi32 : Predicate<"Subtarget->isSwift()">; + +// VDUP.32 is microcoded on Swift - prefer VMOV. +def HasFastVDUP32 : Predicate<"!Subtarget->isSwift()">; +def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">; + +// Cortex-A9 prefers VMOVSR to VMOVDRR even when using NEON for scalar FP, as +// this allows more effective execution domain optimization. See +// setExecutionDomain(). +def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">; +def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">; + def IsLE : Predicate<"TLI.isLittleEndian()">; def IsBE : Predicate<"TLI.isBigEndian()">; @@ -256,15 +283,13 @@ class RegConstraint<string C> { // ARM specific transformation functions and pattern fragments. // -// imm_neg_XFORM - Return a imm value packed into the format described for -// imm_neg defs below. +// imm_neg_XFORM - Return the negation of an i32 immediate value. def imm_neg_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32); }]>; -// so_imm_not_XFORM - Return a so_imm value packed into the format described for -// so_imm_not def below. -def so_imm_not_XFORM : SDNodeXForm<imm, [{ +// imm_not_XFORM - Return the complement of a i32 immediate value. +def imm_not_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32); }]>; @@ -275,7 +300,7 @@ def imm16_31 : ImmLeaf<i32, [{ def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ - int64_t Value = -(int)N->getZExtValue(); + unsigned Value = -(unsigned)N->getZExtValue(); return Value && ARM_AM::getSOImmVal(Value) != -1; }], imm_neg_XFORM> { let ParserMatchClass = so_imm_neg_asmoperand; @@ -287,7 +312,7 @@ def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; } def so_imm_not : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1; - }], so_imm_not_XFORM> { + }], imm_not_XFORM> { let ParserMatchClass = so_imm_not_asmoperand; } @@ -1791,12 +1816,15 @@ def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), let Inst{15-12} = Rd; let Inst{11-0} = label{11-0}; } + +let hasSideEffects = 1 in { def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p), 4, IIC_iALUi, []>; def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 4, IIC_iALUi, []>; +} //===----------------------------------------------------------------------===// // Control Flow Instructions. @@ -3079,15 +3107,19 @@ def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm), (SUBSri GPR:$src, so_imm_neg:$imm)>; def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm), - (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>; + (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>, + Requires<[IsARM, HasV6T2]>; def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm), - (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>; + (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>, + Requires<[IsARM, HasV6T2]>; // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR), (SBCri GPR:$src, so_imm_not:$imm)>; +def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR), + (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>; // Note: These are implemented in C++ code, because they have to generate // ADD/SUBrs instructions, which use a complex pattern that a xform function @@ -3399,6 +3431,18 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, let Inst{11-8} = Rm; let Inst{3-0} = Rn; } +class AsMla1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> { + bits<4> RdLo; + bits<4> RdHi; + bits<4> Rm; + bits<4> Rn; + let Inst{19-16} = RdHi; + let Inst{15-12} = RdLo; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} // FIXME: The v5 pseudos are only necessary for the additional Constraint // property. Remove them when it's possible to add those properties @@ -3419,13 +3463,13 @@ def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, 4, IIC_iMUL32, [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6, UseMulOps]>; } def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6, UseMulOps]> { bits<4> Ra; let Inst{15-12} = Ra; } @@ -3441,7 +3485,7 @@ def MLAv5: ARMPseudoExpand<(outs GPR:$Rd), def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>, - Requires<[IsARM, HasV6T2]> { + Requires<[IsARM, HasV6T2, UseMulOps]> { bits<4> Rd; bits<4> Rm; bits<4> Rn; @@ -3481,14 +3525,14 @@ def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), } // Multiply + accumulate -def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, +def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; -def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; +def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, @@ -3504,17 +3548,22 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), let Inst{3-0} = Rn; } -let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { +let Constraints = "$RLo = $RdLo,$RHi = $RdHi" in { def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), 4, IIC_iMAC64, [], - (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, + pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), 4, IIC_iMAC64, [], - (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, + pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; +} + +let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, pred:$p), 4, IIC_iMAC64, [], @@ -3542,7 +3591,7 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6, UseMulOps]>; def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), @@ -3552,7 +3601,7 @@ def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6, UseMulOps]>; def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), @@ -3606,7 +3655,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3614,7 +3663,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3622,7 +3671,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3630,7 +3679,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3638,7 +3687,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, (sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3646,7 +3695,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, (sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; } } @@ -3749,6 +3798,19 @@ defm SMUA : AI_sdml<0, "smua">; defm SMUS : AI_sdml<1, "smus">; //===----------------------------------------------------------------------===// +// Division Instructions (ARMv7-A with virtualization extension) +// +def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, + "sdiv", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasDivideInARM]>; + +def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, + "udiv", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasDivideInARM]>; + +//===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. // @@ -3986,48 +4048,6 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; -// Conditional instructions -multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi, - Instruction irsr, - InstrItinClass iii, InstrItinClass iir, - InstrItinClass iis> { - def ri : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rfalse, GPR:$Rn, so_imm:$imm, - pred:$p, cc_out:$s), - 4, iii, [], - (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def rr : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rfalse, GPR:$Rn, GPR:$Rm, - pred:$p, cc_out:$s), - 4, iir, [], - (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def rsi : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rfalse, GPR:$Rn, so_reg_imm:$shift, - pred:$p, cc_out:$s), - 4, iis, [], - (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, so_reg_reg:$shift, - pred:$p, cc_out:$s), - 4, iis, [], - (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; -} - -defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm ADDCC : AsI1_bincc_irs<ADDri, ADDrr, ADDrsi, ADDrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm SUBCC : AsI1_bincc_irs<SUBri, SUBrr, SUBrsi, SUBrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; - } // neverHasSideEffects @@ -4723,21 +4743,13 @@ def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), Requires<[IsARM, IsIOS]>; } -// eh.sjlj.dispatchsetup pseudo-instructions. -// These pseudos are used for both ARM and Thumb2. Any differences are -// handled when the pseudo is expanded (which happens before any passes -// that need the instruction size). -let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ], - isBarrier = 1 in +// eh.sjlj.dispatchsetup pseudo-instruction. +// This pseudo is used for both ARM and Thumb. Any differences are handled when +// the pseudo is expanded (which happens before any passes that need the +// instruction size). +let isBarrier = 1 in def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; -let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ], - isBarrier = 1 in -def Int_eh_sjlj_dispatchsetup_nofp : PseudoInst<(outs), (ins), NoItinerary, []>; - //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -4841,32 +4853,32 @@ def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)), (SMULWB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), (sra (shl GPR:$b, (i32 16)), (i32 16)))), (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)), (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), (sra GPR:$b, (i32 16)))), (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), (sra (shl GPR:$b, (i32 16)), (i32 16)))), (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), (i32 16))), (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (sra (mul GPR:$a, sext_16_node:$b), (i32 16))), (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index 048d340..3cf213c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -398,6 +398,20 @@ def VecListFourQWordIndexed : Operand<i32> { let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } +def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 8; +}]>; +def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 8; +}]>; +def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() == 4; +}]>; +def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() == 4; +}]>; def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() == 2; }]>; @@ -1980,7 +1994,7 @@ def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{5}; + let Inst{4} = Rn{4}; } def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, @@ -2023,7 +2037,7 @@ def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{5}; + let Inst{4} = Rn{4}; } def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, extractelt, addrmode6oneL32> { @@ -2273,6 +2287,25 @@ def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; +// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 +// load / store if it's legal. +def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), + (VLD1q64 addrmode6:$addr)>; +def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q64 addrmode6:$addr, QPR:$value)>; +def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), + (VLD1q32 addrmode6:$addr)>; +def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q32 addrmode6:$addr, QPR:$value)>; +def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), + (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; +def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), + (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; + //===----------------------------------------------------------------------===// // NEON pattern fragments //===----------------------------------------------------------------------===// @@ -4455,10 +4488,36 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", [(set DPR:$Vd, (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; +def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), + (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), + (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), + (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), + (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), + (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), + (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), (and DPR:$Vm, (vnotd DPR:$Vd)))), - (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; + +def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), + (and DPR:$Vm, (vnotd DPR:$Vd)))), + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON]>; def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), @@ -4467,9 +4526,35 @@ def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), [(set QPR:$Vd, (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; +def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), + (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), + (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), + (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), + (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), + (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), + (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; + def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), (and QPR:$Vm, (vnotq QPR:$Vd)))), - (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; +def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), + (and QPR:$Vm, (vnotq QPR:$Vd)))), + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON]>; // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", @@ -4983,7 +5068,8 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), IIC_VMOVSI, "vmov", "32", "$R, $V$lane", [(set GPR:$R, (extractelt (v2i32 DPR:$V), - imm:$lane))]> { + imm:$lane))]>, + Requires<[HasNEON, HasFastVGETLNi32]> { let Inst{21} = lane{0}; } // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td @@ -5006,7 +5092,16 @@ def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane))>; + (SubReg_i32_lane imm:$lane))>, + Requires<[HasNEON, HasFastVGETLNi32]>; +def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; +def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), (SSubReg_f32_reg imm:$src2))>; @@ -5117,14 +5212,23 @@ class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; -def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; +def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, + Requires<[HasNEON, HasFastVDUP32]>; def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; -def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>; +// NEONvdup patterns for uarchs with fast VDUP.32. +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, + Requires<[HasNEON,HasFastVDUP32]>; def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; +// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. +def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; + // VDUP : Vector Duplicate Lane (from scalar to all elements) class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, @@ -5561,6 +5665,11 @@ def : N2VSPat<arm_ftoui, VCVTf2ud>; def : N2VSPat<arm_sitof, VCVTs2fd>; def : N2VSPat<arm_uitof, VCVTu2fd>; +// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. +def : Pat<(f32 (bitconvert GPR:$a)), + (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, + Requires<[HasNEON, DontUseVMOVSR]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td index 554f6d9..ae7a5c0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -223,6 +223,7 @@ def t_addrmode_sp : Operand<i32>, def t_addrmode_pc : Operand<i32> { let EncoderMethod = "getAddrModePCOpValue"; let DecoderMethod = "DecodeThumbAddrModePC"; + let PrintMethod = "printThumbLdrLabelOperand"; } //===----------------------------------------------------------------------===// @@ -1200,6 +1201,7 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p), 2, IIC_iALUi, []>; +let hasSideEffects = 1 in def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 2, IIC_iALUi, []>; @@ -1245,10 +1247,6 @@ def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, Requires<[IsThumb, IsIOS]>; -let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ], - isBarrier = 1 in -def tInt_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; - //===----------------------------------------------------------------------===// // Non-Instruction Patterns // diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td index 8ecf009..002d64a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -159,7 +159,7 @@ def t2addrmode_imm12 : Operand<i32>, // t2ldrlabel := imm12 def t2ldrlabel : Operand<i32> { let EncoderMethod = "getAddrModeImm12OpValue"; - let PrintMethod = "printT2LdrLabelOperand"; + let PrintMethod = "printThumbLdrLabelOperand"; } def t2ldr_pcrel_imm12_asmoperand : AsmOperandClass {let Name = "MemPCRelImm12";} @@ -523,6 +523,23 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4, let Inst{7-4} = opc7_4; let Inst{3-0} = Rm; } +class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> RdLo; + bits<4> RdHi; + bits<4> Rn; + bits<4> Rm; + + let Inst{31-23} = 0b111110111; + let Inst{22-20} = opc22_20; + let Inst{19-16} = Rn; + let Inst{15-12} = RdLo; + let Inst{11-8} = RdHi; + let Inst{7-4} = opc7_4; + let Inst{3-0} = Rm; +} /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a @@ -757,33 +774,6 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{24} = 1; let Inst{23-21} = op23_21; } - - // Predicated versions. - def CCri : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_imm:$imm, - pred:$p, cc_out:$s), 4, IIC_iALUi, [], - (!cast<Instruction>(NAME#ri) GPRnopc:$Rd, - GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def CCri12 : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPR:$Rn, imm0_4095:$imm, - pred:$p), - 4, IIC_iALUi, [], - (!cast<Instruction>(NAME#ri12) GPRnopc:$Rd, - GPR:$Rn, imm0_4095:$imm, pred:$p)>, - RegConstraint<"$Rfalse = $Rd">; - def CCrr : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, rGPR:$Rm, - pred:$p, cc_out:$s), 4, IIC_iALUr, [], - (!cast<Instruction>(NAME#rr) GPRnopc:$Rd, - GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def CCrs : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_reg:$Rm, - pred:$p, cc_out:$s), 4, IIC_iALUsi, [], - (!cast<Instruction>(NAME#rs) GPRnopc:$Rd, - GPRnopc:$Rn, t2_so_reg:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; } /// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns @@ -1200,6 +1190,7 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), let neverHasSideEffects = 1, isReMaterializable = 1 in def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p), 4, IIC_iALUi, []>; +let hasSideEffects = 1 in def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 4, IIC_iALUi, @@ -1962,7 +1953,7 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR), def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR), (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>; def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR), - (t2SBCrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; + (t2SBCrr rGPR:$src, (t2MOVi16 (imm_not_XFORM imm:$imm)))>; // Select Bytes -- for disassembly only @@ -2405,7 +2396,8 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, def t2MLA: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> { + [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]>, + Requires<[IsThumb2, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -2415,7 +2407,8 @@ def t2MLA: T2FourReg< def t2MLS: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> { + [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]>, + Requires<[IsThumb2, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -2437,15 +2430,17 @@ def t2UMULL : T2MulLong<0b010, 0b0000, } // isCommutable // Multiply + accumulate -def t2SMLAL : T2MulLong<0b100, 0b0000, +def t2SMLAL : T2MlaLong<0b100, 0b0000, (outs rGPR:$RdLo, rGPR:$RdHi), - (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, - "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>; + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, + "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">; -def t2UMLAL : T2MulLong<0b110, 0b0000, +def t2UMLAL : T2MlaLong<0b110, 0b0000, (outs rGPR:$RdLo, rGPR:$RdHi), - (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, - "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>; + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, + "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">; def t2UMAAL : T2MulLong<0b110, 0b0110, (outs rGPR:$RdLo, rGPR:$RdHi), @@ -2482,7 +2477,7 @@ def t2SMMLA : T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -2503,7 +2498,7 @@ def t2SMMLS: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b110; @@ -2608,7 +2603,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16), (sext_inreg rGPR:$Rm, i16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2621,7 +2616,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16), (sra rGPR:$Rm, (i32 16)))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2634,7 +2629,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), (sext_inreg rGPR:$Rm, i16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2647,7 +2642,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), (sra rGPR:$Rm, (i32 16)))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2660,7 +2655,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2673,7 +2668,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, (sra rGPR:$Rm, (i32 16))), (i32 16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2767,7 +2762,7 @@ def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), // Division Instructions. // Signed and unsigned division on v7-M // -def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, +def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "sdiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>, Requires<[HasDivide, IsThumb2]> { @@ -2778,7 +2773,7 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, let Inst{7-4} = 0b1111; } -def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, +def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "udiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>, Requires<[HasDivide, IsThumb2]> { @@ -3049,37 +3044,6 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), RegConstraint<"$false = $Rd">; } // isCodeGenOnly = 1 -multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs, - InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { - // shifted imm - def ri : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_imm:$imm, - pred:$p, cc_out:$s), - 4, iii, [], - (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - // register - def rr : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rfalse, rGPR:$Rn, rGPR:$Rm, - pred:$p, cc_out:$s), - 4, iir, [], - (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - // shifted register - def rs : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_reg:$ShiftedRm, - pred:$p, cc_out:$s), - 4, iis, [], - (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; -} // T2I_bincc_irs - -defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs, - IIC_iBITi, IIC_iBITr, IIC_iBITsi>; -defm t2ORRCC : T2I_bincc_irs<t2ORRri, t2ORRrr, t2ORRrs, - IIC_iBITi, IIC_iBITr, IIC_iBITsi>; -defm t2EORCC : T2I_bincc_irs<t2EORri, t2EORrr, t2EORrs, - IIC_iBITi, IIC_iBITr, IIC_iBITsi>; } // neverHasSideEffects //===----------------------------------------------------------------------===// @@ -3281,11 +3245,11 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, let Inst{15-14} = 0b10; let Inst{12} = 1; - bits<20> target; + bits<24> target; let Inst{26} = target{19}; let Inst{11} = target{18}; let Inst{13} = target{17}; - let Inst{21-16} = target{16-11}; + let Inst{25-16} = target{20-11}; let Inst{10-0} = target{10-0}; let DecoderMethod = "DecodeT2BInstruction"; } @@ -3367,20 +3331,6 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { Requires<[IsThumb2, IsIOS]>; } -let isCall = 1, Defs = [LR], Uses = [SP] in { - // mov lr, pc; b if callee is marked noreturn to avoid confusing the - // return stack predictor. - def t2BMOVPCB_CALL : tPseudoInst<(outs), - (ins t_bltarget:$func), - 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsThumb]>; -} - -// Direct calls -def : T2Pat<(ARMcall_nolink texternalsym:$func), - (t2BMOVPCB_CALL texternalsym:$func)>, - Requires<[IsThumb]>; - // IT block let Defs = [ITSTATE] in def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td index eb7eaa6..b5a896c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -450,11 +450,11 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; -def : ARMPat<(f32_to_f16 SPR:$a), - (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; +def : Pat<(f32_to_f16 SPR:$a), + (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; -def : ARMPat<(f16_to_f32 GPR:$a), - (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; +def : Pat<(f16_to_f32 GPR:$a), + (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", @@ -523,10 +523,12 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010, let D = VFPNeonDomain; } +// Bitcast i32 -> f32. NEON prefers to use VMOVDRR. def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$Sn), (ins GPR:$Rt), IIC_fpMOVIS, "vmov", "\t$Sn, $Rt", - [(set SPR:$Sn, (bitconvert GPR:$Rt))]> { + [(set SPR:$Sn, (bitconvert GPR:$Rt))]>, + Requires<[HasVFP2, UseVMOVSR]> { // Instruction operands. bits<5> Sn; bits<4> Rt; diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp index 3f99cce..254d8f6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp @@ -168,7 +168,7 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, intptr_t LazyPtr = getIndirectSymAddr(Fn); if (!LazyPtr) { // In PIC mode, the function stub is loading a lazy-ptr. - LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE); + LazyPtr= (intptr_t)emitGlobalValueIndirectSym((const GlobalValue*)F, Fn, JCE); DEBUG(if (F) errs() << "JIT: Indirect symbol emitted at [" << LazyPtr << "] for GV '" << F->getName() << "'\n"; diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 897ceb6..0185289 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -27,7 +27,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -1448,7 +1448,7 @@ namespace { static char ID; ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} - const TargetData *TD; + const DataLayout *TD; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const ARMSubtarget *STI; @@ -1478,7 +1478,7 @@ namespace { } bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - TD = Fn.getTarget().getTargetData(); + TD = Fn.getTarget().getDataLayout(); TII = Fn.getTarget().getInstrInfo(); TRI = Fn.getTarget().getRegisterInfo(); STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index f1c8fc8..c0ac04b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -108,6 +108,11 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// pass. DenseMap<unsigned, unsigned> CPEClones; + /// GlobalBaseReg - keeps track of the virtual register initialized for + /// use as the global base register. This is used for PIC in some PIC + /// relocation models. + unsigned GlobalBaseReg; + public: ARMFunctionInfo() : isThumb(false), @@ -119,7 +124,7 @@ public: GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), NumAlignedDPRCS2Regs(0), JumpTableUId(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false) {} + VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), @@ -130,7 +135,7 @@ public: GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), JumpTableUId(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false) {} + VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } @@ -249,6 +254,9 @@ public: bool hasITBlocks() const { return HasITBlocks; } void setHasITBlocks(bool h) { HasITBlocks = h; } + unsigned getGlobalBaseReg() const { return GlobalBaseReg; } + void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) { if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second) assert(0 && "Duplicate entries!"); diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td index 6f974fd..b0f576b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td @@ -49,6 +49,9 @@ def ssub_0 : SubRegIndex; def ssub_1 : SubRegIndex; def ssub_2 : SubRegIndex<[dsub_1, ssub_0]>; def ssub_3 : SubRegIndex<[dsub_1, ssub_1]>; + +def gsub_0 : SubRegIndex; +def gsub_1 : SubRegIndex; // Let TableGen synthesize the remaining 12 ssub_* indices. // We don't need to name them. } @@ -247,11 +250,16 @@ def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> { } // Scalar single precision floating point register class.. -def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>; +// FIXME: Allocation order changed to s0, s2, s4, ... as a quick hack to +// avoid partial-write dependencies on D registers (S registers are +// renamed as portions of D registers). +def SPR : RegisterClass<"ARM", [f32], 32, (add (decimate + (sequence "S%u", 0, 31), 2), + (sequence "S%u", 0, 31))>; // Subset of SPR which can be used as a source of NEON scalars for 16-bit // operations -def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>; +def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>; // Scalar double precision floating point / generic 64-bit vector register // class. @@ -308,6 +316,17 @@ def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], let AltOrderSelect = [{ return 1; }]; } +// Pseudo-registers representing even-odd pairs of GPRs from R1 to R13/SP. +// These are needed by instructions (e.g. ldrexd/strexd) requiring even-odd GPRs. +def Tuples2R : RegisterTuples<[gsub_0, gsub_1], + [(add R0, R2, R4, R6, R8, R10, R12), + (add R1, R3, R5, R7, R9, R11, SP)]>; + +// Register class representing a pair of even-odd GPRs. +def GPRPair : RegisterClass<"ARM", [untyped], 64, (add Tuples2R)> { + let Size = 64; // 2 x 32 bits, we have no predefined type of that size. +} + // Pseudo-registers representing 3 consecutive D registers. def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2], [(shl DPR, 0), diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td index 81d2fa3..02196d0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSchedule.td +++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td @@ -55,6 +55,7 @@ def IIC_iMUL32 : InstrItinClass; def IIC_iMAC32 : InstrItinClass; def IIC_iMUL64 : InstrItinClass; def IIC_iMAC64 : InstrItinClass; +def IIC_iDIV : InstrItinClass; def IIC_iLoad_i : InstrItinClass; def IIC_iLoad_r : InstrItinClass; def IIC_iLoad_si : InstrItinClass; @@ -261,3 +262,4 @@ def IIC_VTBX4 : InstrItinClass; include "ARMScheduleV6.td" include "ARMScheduleA8.td" include "ARMScheduleA9.td" +include "ARMScheduleSwift.td" diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td index 7bc590f..404634f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td @@ -1876,8 +1876,9 @@ def CortexA9Itineraries : ProcessorItineraries< ]>; // ===---------------------------------------------------------------------===// -// This following definitions describe the simple machine model which -// will replace itineraries. +// The following definitions describe the simpler per-operand machine model. +// This works with MachineScheduler and will eventually replace itineraries. + // Cortex-A9 machine model for scheduling and other instruction cost heuristics. def CortexA9Model : SchedMachineModel { @@ -1891,5 +1892,595 @@ def CortexA9Model : SchedMachineModel { let Itineraries = CortexA9Itineraries; } -// TODO: Add Cortex-A9 processor and scheduler resources. +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +def A9UnitALU : ProcResource<2>; +def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } +def A9UnitAGU : ProcResource<1>; +def A9UnitLS : ProcResource<1>; +def A9UnitFP : ProcResource<1> { let Buffered = 0; } +def A9UnitB : ProcResource<1>; + +//===----------------------------------------------------------------------===// +// Define scheduler read/write types with their resources and latency on A9. + +// Consume an issue slot, but no processor resources. This is useful when all +// other writes associated with the operand have NumMicroOps = 0. +def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; } + +// Write an integer register. +def A9WriteI : SchedWriteRes<[A9UnitALU]>; +// Write an integer shifted-by register +def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } + +// Basic ALU. +def A9WriteA : SchedWriteRes<[A9UnitALU]>; +// ALU with operand shifted by immediate. +def A9WriteAsi : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } +// ALU with operand shifted by register. +def A9WriteAsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; } + +// Multiplication +def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; } +def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5; + let NumMicroOps = 0; } +def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; } +def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4; + let NumMicroOps = 0; } + +// Floating-point +// Only one FP or AGU instruction may issue per cycle. We model this +// by having FP instructions consume the AGU resource. +def A9WriteF : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; } +def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } +def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } +def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } +def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; } +def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } +def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; } +def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; } +def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; } +def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; } + +// NEON has an odd mix of latencies. Simply name the write types by latency. +def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } +def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; } +def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; } +def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; } +def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } +def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } +def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; } +def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } +def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; } + +// Reserve A9UnitFP for 2 consecutive cycles. +def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { + let Latency = 4; + let ResourceCycles = [2]; +} +def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { + let Latency = 7; + let ResourceCycles = [2]; +} +def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { + let Latency = 9; + let ResourceCycles = [2]; +} + +// Branches don't have a def operand but still consume resources. +def A9WriteB : SchedWriteRes<[A9UnitB]>; + +// Address generation. +def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; } + +// Load Integer. +def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; } +// Load the upper 32-bits using the same micro-op. +def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3; + let NumMicroOps = 0; } +// Offset shifted by register. +def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; } +// Load (and zero extend) a byte. +def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; } +def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; } + +// Load or Store Float, aligned. +def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; } + +// Store Integer. +def A9WriteS : SchedWriteRes<[A9UnitLS]>; + +//===----------------------------------------------------------------------===// +// Define resources dynamically for load multiple variants. + +// Define helpers for extra latency without consuming resources. +def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; } +foreach NumCycles = 2-8 in { +def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>; +} // foreach NumCycles + +// Define TII for use in SchedVariant Predicates. +def : PredicateProlog<[{ + const ARMBaseInstrInfo *TII = + static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo()); + (void)TII; +}]>; + +// Define address generation sequences and predicates for 8 flavors of LDMs. +foreach NumAddr = 1-8 in { + +// Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive +// latency for instructions that generate multiple loads or stores. +def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>; + +// Define a predicate to select the LDM based on number of memory addresses. +def A9LMAdr#NumAddr#Pred : + SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>; + +} // foreach NumAddr + +// Fall-back for unknown LDMs. +def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(MI) == 0">; + +// LDM/VLDM/VLDn address generation latency & resources. +// Dynamically select the A9WriteAdrN sequence using a predicate. +def A9WriteLMAdr : SchedWriteVariant<[ + SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>, + SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>, + SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>, + SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>, + SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>, + SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>, + SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>, + SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>, + // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers. + SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>; + +// Define LDM Resources. +// These take no issue resource, so they can be combined with other +// writes like WriteB. +// A9WriteLMLo takes a single LS resource and 2 cycles. +def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2; + let NumMicroOps = 0; } +// Assuming aligned access, the upper half of each pair is free with +// the same latency. +def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2; + let NumMicroOps = 0; } +// Each A9WriteL#N variant adds N cycles of latency without consuming +// additional resources. +foreach NumAddr = 1-8 in { +def A9WriteL#NumAddr : WriteSequence< + [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; +def A9WriteL#NumAddr#Hi : WriteSequence< + [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; +} + +//===----------------------------------------------------------------------===// +// LDM: Load multiple into 32-bit integer registers. + +// A9WriteLM variants expand into a pair of writes for each 64-bit +// value loaded. When the number of registers is odd, the last +// A9WriteLnHi is naturally ignored because the instruction has no +// following def operands. These variants take no issue resource, so +// they may need to be part of a WriteSequence that includes A9WriteIssue. +def A9WriteLM : SchedWriteVariant<[ + SchedVar<A9LMAdr1Pred, [A9WriteL1, A9WriteL1Hi]>, + SchedVar<A9LMAdr2Pred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi]>, + SchedVar<A9LMAdr3Pred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi]>, + SchedVar<A9LMAdr4Pred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi, + A9WriteL4, A9WriteL4Hi]>, + SchedVar<A9LMAdr5Pred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi, + A9WriteL4, A9WriteL4Hi, + A9WriteL5, A9WriteL5Hi]>, + SchedVar<A9LMAdr6Pred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi, + A9WriteL4, A9WriteL4Hi, + A9WriteL5, A9WriteL5Hi, + A9WriteL6, A9WriteL6Hi]>, + SchedVar<A9LMAdr7Pred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi, + A9WriteL4, A9WriteL4Hi, + A9WriteL5, A9WriteL5Hi, + A9WriteL6, A9WriteL6Hi, + A9WriteL7, A9WriteL7Hi]>, + SchedVar<A9LMAdr8Pred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi, + A9WriteL4, A9WriteL4Hi, + A9WriteL5, A9WriteL5Hi, + A9WriteL6, A9WriteL6Hi, + A9WriteL7, A9WriteL7Hi, + A9WriteL8, A9WriteL8Hi]>, + // For unknown LDMs, define the maximum number of writes, but only + // make the first two consume resources. + SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3Hi, A9WriteL3Hi, + A9WriteL4Hi, A9WriteL4Hi, + A9WriteL5Hi, A9WriteL5Hi, + A9WriteL6Hi, A9WriteL6Hi, + A9WriteL7Hi, A9WriteL7Hi, + A9WriteL8Hi, A9WriteL8Hi]>]> { + let Variadic = 1; +} + +//===----------------------------------------------------------------------===// +// VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support. + +// A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources +// so can be used in WriteSequences for in single-issue instructions that +// encapsulate multiple loads. +def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { + let Latency = 1; + let NumMicroOps = 0; +} + +foreach NumAddr = 1-8 in { + +// Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops. +def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>; + +// A9WriteLfp1-8 definitions are statically expanded into a sequence of +// A9WriteLfpOps with additive latency that takes a single issue slot. +// Used directly to describe NEON VLDn. +def A9WriteLfp#NumAddr : WriteSequence< + [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>; + +// A9WriteLfp1-8Mov adds a cycle of latency and FP resource for +// permuting loaded values. +def A9WriteLfp#NumAddr#Mov : WriteSequence< + [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>; + +} // foreach NumAddr + +// Define VLDM/VSTM PreRA resources. +// A9WriteLMfpPreRA are dynamically expanded into the correct +// A9WriteLfp1-8 sequence based on a predicate. This supports the +// preRA VLDM variants in which all 64-bit loads are written to the +// same tuple of either single or double precision registers. +def A9WriteLMfpPreRA : SchedWriteVariant<[ + SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>, + SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>, + SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>, + SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>, + SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>, + SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>, + SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>, + SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>, + // For unknown VLDM/VSTM PreRA, assume 2xS registers. + SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>; + +// Define VLDM/VSTM PostRA Resources. +// A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency. +def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; } + +foreach NumAddr = 1-8 in { + +// Each A9WriteL#N variant adds N cycles of latency without consuming +// additional resources. +def A9WriteLMfp#NumAddr : WriteSequence< + [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; + +// Assuming aligned access, the upper half of each pair is free with +// the same latency. +def A9WriteLMfp#NumAddr#Hi : WriteSequence< + [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>; + +} // foreach NumAddr + +// VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a +// pair of writes for each 64-bit data loaded. When the number of +// registers is odd, the last WriteLMfpnHi is naturally ignored because +// the instruction has no following def operands. +def A9WriteLMfpPostRA : SchedWriteVariant<[ + SchedVar<A9LMAdr1Pred, [A9WriteLMfp1, A9WriteLMfp1Hi]>, + SchedVar<A9LMAdr2Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi]>, + SchedVar<A9LMAdr3Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi, + A9WriteLMfp3, A9WriteLMfp3Hi]>, + SchedVar<A9LMAdr4Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi, + A9WriteLMfp3, A9WriteLMfp3Hi, + A9WriteLMfp4, A9WriteLMfp4Hi]>, + SchedVar<A9LMAdr5Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi, + A9WriteLMfp3, A9WriteLMfp3Hi, + A9WriteLMfp4, A9WriteLMfp4Hi, + A9WriteLMfp5, A9WriteLMfp5Hi]>, + SchedVar<A9LMAdr6Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi, + A9WriteLMfp3, A9WriteLMfp3Hi, + A9WriteLMfp4, A9WriteLMfp4Hi, + A9WriteLMfp5, A9WriteLMfp5Hi, + A9WriteLMfp6, A9WriteLMfp6Hi]>, + SchedVar<A9LMAdr7Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi, + A9WriteLMfp3, A9WriteLMfp3Hi, + A9WriteLMfp4, A9WriteLMfp4Hi, + A9WriteLMfp5, A9WriteLMfp5Hi, + A9WriteLMfp6, A9WriteLMfp6Hi, + A9WriteLMfp7, A9WriteLMfp7Hi]>, + SchedVar<A9LMAdr8Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi, + A9WriteLMfp3, A9WriteLMfp3Hi, + A9WriteLMfp4, A9WriteLMfp4Hi, + A9WriteLMfp5, A9WriteLMfp5Hi, + A9WriteLMfp6, A9WriteLMfp6Hi, + A9WriteLMfp7, A9WriteLMfp7Hi, + A9WriteLMfp8, A9WriteLMfp8Hi]>, + // For unknown LDMs, define the maximum number of writes, but only + // make the first two consume resources. + SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp1Hi, + A9WriteLMfp2, A9WriteLMfp2Hi, + A9WriteLMfp3Hi, A9WriteLMfp3Hi, + A9WriteLMfp4Hi, A9WriteLMfp4Hi, + A9WriteLMfp5Hi, A9WriteLMfp5Hi, + A9WriteLMfp6Hi, A9WriteLMfp6Hi, + A9WriteLMfp7Hi, A9WriteLMfp7Hi, + A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> { + let Variadic = 1; +} + +// Distinguish between our multiple MI-level forms of the same +// VLDM/VSTM instructions. +def A9PreRA : SchedPredicate< + "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">; +def A9PostRA : SchedPredicate< + "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">; + +// VLDM represents all destination registers as a single register +// tuple, unlike LDM. So the number of write operands is not variadic. +def A9WriteLMfp : SchedWriteVariant<[ + SchedVar<A9PreRA, [A9WriteLMfpPreRA]>, + SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>; + +//===----------------------------------------------------------------------===// +// Resources for other (non LDM/VLDM) Variants. + +// These mov immediate writers are unconditionally expanded with +// additive latency. +def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>; +def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, A9WriteA]>; +def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>; + +// Some ALU operations can read loaded integer values one cycle early. +def A9ReadA : SchedReadAdvance<1, + [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi, + A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4, + A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8, + A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi, + A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>; + +// Read types for operands that are unconditionally read in cycle N +// after the instruction issues, decreases producer latency by N-1. +def A9Read2 : SchedReadAdvance<1>; +def A9Read3 : SchedReadAdvance<2>; +def A9Read4 : SchedReadAdvance<3>; + +//===----------------------------------------------------------------------===// +// Map itinerary classes to scheduler read/write resources per operand. +// +// For ARM, we piggyback scheduler resources on the Itinerary classes +// to avoid perturbing the existing instruction definitions. + +// This table follows the ARM Cortex-A9 Technical Reference Manuals, +// mostly in order. +let SchedModel = CortexA9Model in { + +def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, + IIC_iMVNi,IIC_iMVNsi, + IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>; +def :ItinRW<[A9WriteI,A9ReadA],[IIC_iMVNr]>; +def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>; + +def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>; +def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>; +def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>; + +def :ItinRW<[A9WriteA], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>; +def :ItinRW<[A9WriteA, A9ReadA], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>; +def :ItinRW<[A9WriteA, A9ReadA, A9ReadA],[IIC_iALUr,IIC_iCMPr]>; +def :ItinRW<[A9WriteAsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>; +def :ItinRW<[A9WriteAsi, A9ReadA], [IIC_iALUsi]>; +def :ItinRW<[A9WriteAsi, ReadDefault, A9ReadA], [IIC_iALUsir]>; // RSB +def :ItinRW<[A9WriteAsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>; +def :ItinRW<[A9WriteAsr, A9ReadA], [IIC_iALUsr,IIC_iCMPsr]>; + +// A9WriteHi ignored for MUL32. +def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32, + IIC_iMUL64,IIC_iMAC64]>; +// FIXME: SMLALxx needs itin classes +def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>; + +// TODO: For floating-point ops, we model the pipeline forwarding +// latencies here. WAW latencies are sometimes longer. + +def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI, + IIC_fpUNA32, IIC_fpUNA64, + IIC_fpCMP32, IIC_fpCMP64]>; +def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>; +def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS, + IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI, + IIC_fpALU32, IIC_fpALU64]>; +def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>; +def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>; +def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>; +def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>; +def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>; +def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>; +def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>; +def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>; + +def :ItinRW<[A9WriteB], [IIC_Br]>; + +// A9 PLD is processed in a dedicated unit. +def :ItinRW<[], [IIC_Preload]>; + +// Note: We must assume that loads are aligned, since the machine +// model cannot know this statically and A9 ignores alignment hints. + +// A9WriteAdr consumes AGU regardless address writeback. But it's +// latency is only relevant for users of an updated address. +def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r, + IIC_iLoad_iu,IIC_iLoad_ru]>; +def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>; +def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r, + IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>; +def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>; +def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r, + IIC_iLoad_d_ru]>; +// Store either has no def operands, or the one def for address writeback. +def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r, + IIC_iStore_iu, IIC_iStore_ru, + IIC_iStore_d_i, IIC_iStore_d_r, + IIC_iStore_d_ru]>; +def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu, + IIC_iStore_bh_i, IIC_iStore_bh_r, + IIC_iStore_bh_iu, IIC_iStore_bh_ru]>; +def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>; + +// A9WriteML will be expanded into a separate write for each def +// operand. Address generation consumes resources, but A9WriteLMAdr +// is listed after all def operands, so has no effective latency. +// +// Note: A9WriteLM expands into an even number of def operands. The +// actual number of def operands may be less by one. +def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>; + +// Load multiple with address writeback has an extra def operand in +// front of the loaded registers. +// +// Reuse the load-multiple variants for store-multiple because the +// resources are identical, For stores only the address writeback +// has a def operand so the WriteL latencies are unused. +def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu, + IIC_iStore_m, + IIC_iStore_mu]>; +def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>; +def :ItinRW<[A9WriteL, A9WriteAdr, A9WriteA], [IIC_iLoadiALU]>; + +def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>; + +def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>; +def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>; +def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64, + IIC_fpStore_m, IIC_fpStore_mu]>; + +// Note: Unlike VLDM, VLD1 expects the writeback operand after the +// normal writes. +def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u, + IIC_VLD1x2, IIC_VLD1x2u]>; +def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u, + IIC_VLD1x4, IIC_VLD1x4u, + IIC_VLD4dup, IIC_VLD4dupu]>; +def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu, + IIC_VLD2, IIC_VLD2u, + IIC_VLD2dup, IIC_VLD2dupu]>; +def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu, + IIC_VLD2x2, IIC_VLD2x2u, + IIC_VLD2ln, IIC_VLD2lnu]>; +def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u, + IIC_VLD3dup, IIC_VLD3dupu]>; +def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u, + IIC_VLD4ln, IIC_VLD4lnu]>; +def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>; + +// Vector stores use similar resources to vector loads, so use the +// same write types. The address write must be first for stores with +// address writeback. +def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u, + IIC_VST1x2, IIC_VST1x2u, + IIC_VST1ln, IIC_VST1lnu, + IIC_VST2, IIC_VST2u, + IIC_VST2x2, IIC_VST2x2u, + IIC_VST2ln, IIC_VST2lnu]>; +def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u, + IIC_VST1x4, IIC_VST1x4u, + IIC_VST3, IIC_VST3u, + IIC_VST3ln, IIC_VST3lnu, + IIC_VST4, IIC_VST4u, + IIC_VST4ln, IIC_VST4lnu]>; + +// NEON moves. +def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>; +def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>; +def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>; + +// NEON integer arithmetic +// +// VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL +def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>; +// VSUB/VMVN/VCLSD/VCLZD/VCNTD +def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>; +// VADDL/VSUBL/VNEG are mapped later under IIC_SHLi. +// ... +// VHADD/VRHADD/VQADD/VTST/VADH/VRADH +def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>; +// VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL +def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>; +// VQNEG/VQABS +def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>; +// VABS +def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>; +// VPADD/VPADDL are mapped later under IIC_SHLi. +// ... +// VCLSQ/VCLZQ/VCNTQ, takes two cycles. +def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>; +// VMOVimm/VMVNimm/VORRimm/VBICimm +def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>; +def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>; +def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>; + +// NEON integer multiply +// +// Note: these don't quite match the timing docs, but they do match +// the original A9 itinerary. +def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>; +def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>; +def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>; +def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>; +def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>; +def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>; +def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>; +def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>; + +// NEON integer shift +// TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles. +def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>; +def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>; + +// NEON permute +def :ItinRW<[A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>; +def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2], + [IIC_VPERMQ3, IIC_VEXTQ]>; +def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>; +def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>; +def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>; +def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>; +def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>; +def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>; +def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>; +def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3], + [IIC_VTBX4]>; +// NEON floating-point +def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>; +def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>; +def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>; +def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>; +def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>; +def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>; +def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>; +def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>; +} // SchedModel = CortexA9Model diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td new file mode 100644 index 0000000..e9bc3e0 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td @@ -0,0 +1,1085 @@ +//=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Swift processor.. +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// This section contains legacy support for itineraries. This is +// required until SD and PostRA schedulers are replaced by MachineScheduler. + +def SW_DIS0 : FuncUnit; +def SW_DIS1 : FuncUnit; +def SW_DIS2 : FuncUnit; + +def SW_ALU0 : FuncUnit; +def SW_ALU1 : FuncUnit; +def SW_LS : FuncUnit; +def SW_IDIV : FuncUnit; +def SW_FDIV : FuncUnit; + +// FIXME: Need bypasses. +// FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and +// IIC_iMOVix2ld better. +// FIXME: Model the special immediate shifts that are not microcoded. +// FIXME: Do we need to model the fact that uses of r15 in a micro-op force it +// to issue on pipe 1? +// FIXME: Model the pipelined behavior of CMP / TST instructions. +// FIXME: Better model the microcode stages of multiply instructions, especially +// conditional variants. +// FIXME: Add preload instruction when it is documented. +// FIXME: Model non-pipelined nature of FP div / sqrt unit. + +def SwiftItineraries : ProcessorItineraries< + [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [ + // + // Move instructions, unconditional + InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2]>, + InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [3]>, + InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_LS]>], + [5]>, + // + // MVN instructions + InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + // + // No operand cycles + InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>]>, + // + // Binary Instructions that produce a result + InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1]>, + // + // Bitwise Instructions that produce a result + InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1]>, + // + // Unary Instructions that produce a result + + // CLZ, RBIT, etc. + InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + + // BFC, BFI, UBFX, SBFX + InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1]>, + + // + // Zero and sign extension instructions + InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1, 1]>, + // + // Compare instructions + InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + // + // Test instructions + InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + // + // Move instructions, conditional + // FIXME: Correctly model the extra input dep on the destination. + InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2]>, + + // Integer multiply pipeline + // + InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [3, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1, 1]>, + InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_ALU0], 3>, + InstrStage<1, [SW_ALU0]>], + [5, 5, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [5, 6, 1, 1]>, + // + // Integer divide + InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0], 0>, + InstrStage<14, [SW_IDIV]>], + [14, 1, 1]>, + + // Integer load pipeline + // FIXME: The timings are some rough approximations + // + // Immediate offset + InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_LS]>], + [3, 4, 1]>, + // + // Register offset + InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [3, 4, 1, 1]>, + // + // Scaled register offset + InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [5, 1, 1]>, + InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [5, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + // + // Register offset with update + InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [3, 4, 1, 1]>, + // + // Scaled register offset with update + InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [5, 3, 1, 1]>, + InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [5, 3, 1, 1]>, + // + // Load multiple, def is the 5th operand. + // FIXME: This assumes 3 to 4 registers. + InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1, 3], [], -1>, // dynamic uops + + // + // Load multiple + update, defs are the 1st and 5th operands. + InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1, 3], [], -1>, // dynamic uops + // + // Load multiple plus branch + InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1, 3], [], -1>, // dynamic uops + // + // Pop, def is the 3rd operand. + InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 3], [], -1>, // dynamic uops + // + // Pop + branch, def is the 3rd operand. + InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 3], [], -1>, // dynamic uops + + // + // iLoadi + iALUr for t2LDRpci_pic. + InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [4, 1]>, + + // Integer store pipeline + /// + // Immediate offset + InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + // + // Register offset + InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + // + // Scaled register offset + InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + // + // Register offset with update + InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1]>, + InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1]>, + InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1]>, + // + // Scaled register offset with update + InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>], + [3, 1, 1, 1]>, + // + // Store multiple + InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [], [], -1>, // dynamic uops + // + // Store multiple + update + InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [2], [], -1>, // dynamic uops + + // + // Preload + InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>, + + // Branch + // + // no delay slots, so the latency of a branch is unimportant + InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>, + + // FP Special Register to Integer Register File Move + InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + // + // Single-precision FP Unary + // + // Most floating-point moves get issued on ALU0. + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Double-precision FP Unary + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + + // + // Single-precision FP Compare + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [1, 1]>, + // + // Double-precision FP Compare + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [1, 1]>, + // + // Single to Double FP Convert + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Double to Single FP Convert + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + + // + // Single to Half FP Convert + InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU1], 4>, + InstrStage<1, [SW_ALU1]>], + [6, 1]>, + // + // Half to Single FP Convert + InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + + // + // Single-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Double-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Integer to Single-Precision FP Convert + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Integer to Double-Precision FP Convert + InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Single-precision FP ALU + InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-precision FP ALU + InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Single-precision FP Multiply + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Double-precision FP Multiply + InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [6, 1, 1]>, + // + // Single-precision FP MAC + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-precision FP MAC + InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [12, 1, 1]>, + // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [12, 1, 1]>, + // + // Single-precision FP DIV + InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<15, [SW_FDIV]>], + [17, 1, 1]>, + // + // Double-precision FP DIV + InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<30, [SW_FDIV]>], + [32, 1, 1]>, + // + // Single-precision FP SQRT + InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<15, [SW_FDIV]>], + [17, 1]>, + // + // Double-precision FP SQRT + InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<30, [SW_FDIV]>], + [32, 1, 1]>, + + // + // Integer to Single-precision Move + InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0]>], + [6, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_LS]>], + [3, 4, 1]>, + // + // Single-precision FP Load + InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1]>, + // + // Double-precision FP Load + InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1]>, + // + // FP Load Multiple + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 4], [], -1>, // dynamic uops + // + // FP Load Multiple + update + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1, 4], [], -1>, // dynamic uops + // + // Single-precision FP Store + InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + // + // Double-precision FP Store + InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + // + // FP Store Multiple + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1], [], -1>, // dynamic uops + // + // FP Store Multiple + update + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1], [], -1>, // dynamic uops + // NEON + // + // Double-register Integer Unary + InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Quad-register Integer Unary + InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Double-register Integer Q-Unary + InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Quad-register Integer CountQ-Unary + InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Double-register Integer Binary + InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Binary + InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Integer Subtract + InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Subtract + InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Integer Shift + InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Shift + InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Double-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Double-register Integer Subtract (4 cycle) + InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Subtract (4 cycle) + InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + + // + // Double-register Integer Count + InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Count + InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1, 1]>, + // + // Quad-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1, 1]>, + // + // Double-register Integer Pair Add Long + InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Pair Add Long + InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + + // + // Double-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Quad-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + + // + // Double-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Quad-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Double-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + // + // Double-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + + // + // Move + InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Move Immediate + InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2]>, + // + // Double-register Permute Move + InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1]>, + // + // Quad-register Permute Move + InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1]>, + // + // Integer to Single-precision Move + InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0]>], + [6, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_LS]>], + [3, 4, 1]>, + // + // Integer to Lane Move + // FIXME: I think this is correct, but it is not clear from the tuning guide. + InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0]>], + [6, 1]>, + + // + // Vector narrow move + InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1]>, + // + // Double-register FP Unary + // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, + // and they issue on a different pipeline. + InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Quad-register FP Unary + // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, + // and they issue on a different pipeline. + InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Double-register FP Binary + // FIXME: We're using this itin for many instructions. + InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + + // + // VPADD, etc. + InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Double-register FP VMUL + InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Quad-register FP Binary + InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register FP VMUL + InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Double-register FP Multiple-Accumulate + InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Quad-register FP Multiple-Accumulate + InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-register Fused FP Multiple-Accumulate + InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Quad-register FusedF P Multiple-Accumulate + InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-register Reciprical Step + InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Quad-register Reciprical Step + InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-register Permute + // FIXME: The latencies are unclear from the documentation. + InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [3, 4, 3, 4]>, + // + // Quad-register Permute + // FIXME: The latencies are unclear from the documentation. + InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [3, 4, 3, 4]>, + // + // Quad-register Permute (3 cycle issue on A9) + InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [3, 4, 3, 4]>, + + // + // Double-register VEXT + InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + // + // Quad-register VEXT + InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + // + // VTB + InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 3, 3]>, + InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [6, 1, 3, 5, 5]>, + InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 3, 5, 7, 7]>, + // + // VTBX + InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 3, 3]>, + InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [6, 1, 3, 5, 5]>, + InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 3, 5, 7, 7]> +]>; + +// ===---------------------------------------------------------------------===// +// This following definitions describe the simple machine model which +// will replace itineraries. + +// Swift machine model for scheduling and other instruction cost heuristics. +def SwiftModel : SchedMachineModel { + let IssueWidth = 3; // 3 micro-ops are dispatched per cycle. + let MinLatency = 0; // Data dependencies are allowed within dispatch groups. + let LoadLatency = 3; + + let Itineraries = SwiftItineraries; +} + +// TODO: Add Swift processor and scheduler resources. diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 31d5d38..b33b3c9 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -155,7 +155,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, TargetLowering::ArgListEntry Entry; // First argument: data pointer - Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*DAG.getContext()); Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp index 4762854..bcc9db4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -13,8 +13,9 @@ #include "ARMSubtarget.h" #include "ARMBaseRegisterInfo.h" +#include "ARMBaseInstrInfo.h" #include "llvm/GlobalValue.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #define GET_SUBTARGETINFO_TARGET_DESC @@ -31,6 +32,10 @@ static cl::opt<bool> DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden); static cl::opt<bool> +UseFusedMulOps("arm-use-mulops", + cl::init(true), cl::Hidden); + +static cl::opt<bool> StrictAlign("arm-strict-align", cl::Hidden, cl::desc("Disallow all unaligned memory accesses")); @@ -49,6 +54,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasVFPv4(false) , HasNEON(false) , UseNEONForSinglePrecisionFP(false) + , UseMulOps(UseFusedMulOps) , SlowFPVMLx(false) , HasVMLxForwarding(false) , SlowFPBrcc(false) @@ -63,6 +69,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasFP16(false) , HasD16(false) , HasHardwareDivide(false) + , HasHardwareDivideInARM(false) , HasT2ExtractPack(false) , HasDataBarrier(false) , Pref32BitThumb(false) diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index b394061..8e6b650 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -30,7 +30,7 @@ class StringRef; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { - Others, CortexA8, CortexA9 + Others, CortexA8, CortexA9, CortexA15, Swift }; /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. @@ -57,6 +57,10 @@ protected: /// determine if NEON should actually be used. bool UseNEONForSinglePrecisionFP; + /// UseMulOps - True if non-microcoded fused integer multiply-add and + /// multiply-subtract instructions should be used. + bool UseMulOps; + /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates /// whether the FP VML[AS] instructions are slow (if so, don't use them). bool SlowFPVMLx; @@ -107,6 +111,9 @@ protected: /// HasHardwareDivide - True if subtarget supports [su]div bool HasHardwareDivide; + /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode + bool HasHardwareDivideInARM; + /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack /// instructions. bool HasT2ExtractPack; @@ -199,7 +206,10 @@ protected: bool isCortexA8() const { return ARMProcFamily == CortexA8; } bool isCortexA9() const { return ARMProcFamily == CortexA9; } + bool isCortexA15() const { return ARMProcFamily == CortexA15; } + bool isSwift() const { return ARMProcFamily == Swift; } bool isCortexM3() const { return CPUString == "cortex-m3"; } + bool isLikeA9() const { return isCortexA9() || isCortexA15(); } bool hasARMOps() const { return !NoARM; } @@ -211,8 +221,10 @@ protected: return hasNEON() && UseNEONForSinglePrecisionFP; } bool hasDivide() const { return HasHardwareDivide; } + bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool hasDataBarrier() const { return HasDataBarrier; } + bool useMulOps() const { return UseMulOps; } bool useFPVMLx() const { return !SlowFPVMLx; } bool hasVMLxForwarding() const { return HasVMLxForwarding; } bool isFPBrccSlow() const { return SlowFPBrcc; } diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 171c9ad..b486d4f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -60,7 +60,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), InstrInfo(Subtarget), - DataLayout(Subtarget.isAPCS_ABI() ? + DL(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" "v128:32:128-v64:32:64-n32-S32") : Subtarget.isAAPCS_ABI() ? @@ -68,10 +68,10 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, "v128:64:128-v64:64:64-n32-S64") : std::string("e-p:32:32-f64:64:64-i64:64:64-" "v128:64:128-v64:64:64-n32-S32")), - ELFWriterInfo(*this), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget) { + FrameLowering(Subtarget), + STTI(&TLInfo), VTTI(&TLInfo) { if (!Subtarget.hasARMOps()) report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " "support ARM mode execution!"); @@ -88,7 +88,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, InstrInfo(Subtarget.hasThumb2() ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), - DataLayout(Subtarget.isAPCS_ABI() ? + DL(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" "i16:16:32-i8:8:32-i1:8:32-" "v128:32:128-v64:32:64-a:0:32-n32-S32") : @@ -99,12 +99,12 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, std::string("e-p:32:32-f64:64:64-i64:64:64-" "i16:16:32-i8:8:32-i1:8:32-" "v128:64:128-v64:64:64-a:0:32-n32-S32")), - ELFWriterInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget.hasThumb2() ? new ARMFrameLowering(Subtarget) - : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { + : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)), + STTI(&TLInfo), VTTI(&TLInfo) { } namespace { @@ -143,6 +143,11 @@ bool ARMPassConfig::addPreISel() { bool ARMPassConfig::addInstSelector() { addPass(createARMISelDag(getARMTargetMachine(), getOptLevel())); + + const ARMSubtarget *Subtarget = &getARMSubtarget(); + if (Subtarget->isTargetELF() && !Subtarget->isThumb1Only() && + TM->Options.EnableFastISel) + addPass(createARMGlobalBaseRegPass()); return false; } @@ -150,7 +155,7 @@ bool ARMPassConfig::addPreRegAlloc() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only()) addPass(createARMLoadStoreOptimizationPass(true)); - if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) + if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9()) addPass(createMLxExpansionPass()); return true; } diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h index abcdb24..ebdd5b4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -15,7 +15,6 @@ #define ARMTARGETMACHINE_H #include "ARMInstrInfo.h" -#include "ARMELFWriterInfo.h" #include "ARMFrameLowering.h" #include "ARMJITInfo.h" #include "ARMSubtarget.h" @@ -25,7 +24,8 @@ #include "Thumb1FrameLowering.h" #include "Thumb2InstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetTransformImpl.h" +#include "llvm/DataLayout.h" #include "llvm/MC/MCStreamer.h" #include "llvm/ADT/OwningPtr.h" @@ -62,11 +62,12 @@ public: class ARMTargetMachine : public ARMBaseTargetMachine { virtual void anchor(); ARMInstrInfo InstrInfo; - const TargetData DataLayout; // Calculates type size & alignment - ARMELFWriterInfo ELFWriterInfo; + const DataLayout DL; // Calculates type size & alignment ARMTargetLowering TLInfo; ARMSelectionDAGInfo TSInfo; ARMFrameLowering FrameLowering; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -88,12 +89,14 @@ class ARMTargetMachine : public ARMBaseTargetMachine { virtual const ARMFrameLowering *getFrameLowering() const { return &FrameLowering; } - - virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetData *getTargetData() const { return &DataLayout; } - virtual const ARMELFWriterInfo *getELFWriterInfo() const { - return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; } + virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const DataLayout *getDataLayout() const { return &DL; } }; /// ThumbTargetMachine - Thumb target machine. @@ -104,12 +107,13 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { virtual void anchor(); // Either Thumb1InstrInfo or Thumb2InstrInfo. OwningPtr<ARMBaseInstrInfo> InstrInfo; - const TargetData DataLayout; // Calculates type size & alignment - ARMELFWriterInfo ELFWriterInfo; + const DataLayout DL; // Calculates type size & alignment ARMTargetLowering TLInfo; ARMSelectionDAGInfo TSInfo; // Either Thumb1FrameLowering or ARMFrameLowering. OwningPtr<ARMFrameLowering> FrameLowering; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -138,10 +142,13 @@ public: virtual const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); } - virtual const TargetData *getTargetData() const { return &DataLayout; } - virtual const ARMELFWriterInfo *getELFWriterInfo() const { - return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; } + virtual const DataLayout *getDataLayout() const { return &DL; } }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 3a5957b..c61e3bd 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -181,49 +181,44 @@ class ARMAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index); // Asm Match Converter Methods - bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtT2StrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, + void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtT2StrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + void cvtLdWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, + void cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + void cvtLdExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + void cvtLdExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + void cvtStExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + void cvtStExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + void cvtLdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtStrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtLdWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode, + void cvtThumbMultiply(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, + void cvtVLDwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, + void cvtVLDwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, + void cvtVSTwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, + void cvtVSTwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool validateInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); bool processInstruction(MCInst &Inst, @@ -258,15 +253,17 @@ public: // Implementation of the MCTargetAsmParser interface: bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - bool ParseInstruction(StringRef Name, SMLoc NameLoc, + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); bool ParseDirective(AsmToken DirectiveID); unsigned checkTargetMatchPredicate(MCInst &Inst); - bool MatchAndEmitInstruction(SMLoc IDLoc, + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out); + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); }; } // end anonymous namespace @@ -486,7 +483,8 @@ public: SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } - + /// getLocRange - Get the range between the first and last token of this + /// operand. SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } ARMCC::CondCodes getCondCode() const { @@ -862,7 +860,7 @@ public: bool isSPRRegList() const { return Kind == k_SPRRegisterList; } bool isToken() const { return Kind == k_Token; } bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; } - bool isMemory() const { return Kind == k_Memory; } + bool isMem() const { return Kind == k_Memory; } bool isShifterImm() const { return Kind == k_ShifterImmediate; } bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; } bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; } @@ -873,14 +871,14 @@ public: return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift; } bool isMemNoOffset(bool alignOK = false) const { - if (!isMemory()) + if (!isMem()) return false; // No offset of any kind. return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 && (alignOK || Memory.Alignment == 0); } bool isMemPCRelImm12() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Base register must be PC. if (Memory.BaseRegNum != ARM::PC) @@ -894,7 +892,7 @@ public: return isMemNoOffset(true); } bool isAddrMode2() const { - if (!isMemory() || Memory.Alignment != 0) return false; + if (!isMem() || Memory.Alignment != 0) return false; // Check for register offset. if (Memory.OffsetRegNum) return true; // Immediate offset in range [-4095, 4095]. @@ -916,7 +914,7 @@ public: // and we reject it. if (isImm() && !isa<MCConstantExpr>(getImm())) return true; - if (!isMemory() || Memory.Alignment != 0) return false; + if (!isMem() || Memory.Alignment != 0) return false; // No shifts are legal for AM3. if (Memory.ShiftType != ARM_AM::no_shift) return false; // Check for register offset. @@ -946,7 +944,7 @@ public: // and we reject it. if (isImm() && !isa<MCConstantExpr>(getImm())) return true; - if (!isMemory() || Memory.Alignment != 0) return false; + if (!isMem() || Memory.Alignment != 0) return false; // Check for register offset. if (Memory.OffsetRegNum) return false; // Immediate offset in range [-1020, 1020] and a multiple of 4. @@ -956,25 +954,25 @@ public: Val == INT32_MIN; } bool isMemTBB() const { - if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative || Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0) return false; return true; } bool isMemTBH() const { - if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative || Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm != 1 || Memory.Alignment != 0 ) return false; return true; } bool isMemRegOffset() const { - if (!isMemory() || !Memory.OffsetRegNum || Memory.Alignment != 0) + if (!isMem() || !Memory.OffsetRegNum || Memory.Alignment != 0) return false; return true; } bool isT2MemRegOffset() const { - if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative || Memory.Alignment != 0) return false; // Only lsl #{0, 1, 2, 3} allowed. @@ -987,14 +985,14 @@ public: bool isMemThumbRR() const { // Thumb reg+reg addressing is simple. Just two registers, a base and // an offset. No shifts, negations or any other complicating factors. - if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || + if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative || Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0) return false; return isARMLowRegister(Memory.BaseRegNum) && (!Memory.OffsetRegNum || isARMLowRegister(Memory.OffsetRegNum)); } bool isMemThumbRIs4() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || + if (!isMem() || Memory.OffsetRegNum != 0 || !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0) return false; // Immediate offset, multiple of 4 in range [0, 124]. @@ -1003,7 +1001,7 @@ public: return Val >= 0 && Val <= 124 && (Val % 4) == 0; } bool isMemThumbRIs2() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || + if (!isMem() || Memory.OffsetRegNum != 0 || !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0) return false; // Immediate offset, multiple of 4 in range [0, 62]. @@ -1012,7 +1010,7 @@ public: return Val >= 0 && Val <= 62 && (Val % 2) == 0; } bool isMemThumbRIs1() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || + if (!isMem() || Memory.OffsetRegNum != 0 || !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0) return false; // Immediate offset in range [0, 31]. @@ -1021,7 +1019,7 @@ public: return Val >= 0 && Val <= 31; } bool isMemThumbSPI() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.BaseRegNum != ARM::SP || Memory.Alignment != 0) return false; // Immediate offset, multiple of 4 in range [0, 1020]. @@ -1035,7 +1033,7 @@ public: // and we reject it. if (isImm() && !isa<MCConstantExpr>(getImm())) return true; - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset a multiple of 4 in range [-1020, 1020]. if (!Memory.OffsetImm) return true; @@ -1044,7 +1042,7 @@ public: return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || Val == INT32_MIN; } bool isMemImm0_1020s4Offset() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset a multiple of 4 in range [0, 1020]. if (!Memory.OffsetImm) return true; @@ -1052,7 +1050,7 @@ public: return Val >= 0 && Val <= 1020 && (Val & 3) == 0; } bool isMemImm8Offset() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Base reg of PC isn't allowed for these encodings. if (Memory.BaseRegNum == ARM::PC) return false; @@ -1062,7 +1060,7 @@ public: return (Val == INT32_MIN) || (Val > -256 && Val < 256); } bool isMemPosImm8Offset() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset in range [0, 255]. if (!Memory.OffsetImm) return true; @@ -1070,7 +1068,7 @@ public: return Val >= 0 && Val < 256; } bool isMemNegImm8Offset() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Base reg of PC isn't allowed for these encodings. if (Memory.BaseRegNum == ARM::PC) return false; @@ -1080,7 +1078,7 @@ public: return (Val == INT32_MIN) || (Val > -256 && Val < 0); } bool isMemUImm12Offset() const { - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset in range [0, 4095]. if (!Memory.OffsetImm) return true; @@ -1094,7 +1092,7 @@ public: if (isImm() && !isa<MCConstantExpr>(getImm())) return true; - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset in range [-4095, 4095]. if (!Memory.OffsetImm) return true; @@ -3376,7 +3374,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + if (!Tok.is(AsmToken::Identifier)) + return MatchOperand_NoMatch; StringRef Mask = Tok.getString(); if (isMClass()) { @@ -3880,8 +3879,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// cvtT2LdrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt, Rt2 ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3892,14 +3891,13 @@ cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtT2StrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtT2StrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtT2StrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateReg(0)); @@ -3910,14 +3908,13 @@ cvtT2StrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3926,28 +3923,26 @@ cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3956,14 +3951,13 @@ cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3972,57 +3966,53 @@ cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -4034,14 +4024,13 @@ cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -4053,14 +4042,13 @@ cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStExtTWriteBackImm - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4072,14 +4060,13 @@ cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStExtTWriteBackReg - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4091,14 +4078,13 @@ cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt, Rt2 ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -4109,14 +4095,13 @@ cvtLdrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4127,40 +4112,27 @@ cvtStrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -/// cvtThumbMultiple- Convert parsed operands to MCInst. +/// cvtThumbMultiply - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtThumbMultiply(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtThumbMultiply(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // The second source operand must be the same register as the destination - // operand. - if (Operands.size() == 6 && - (((ARMOperand*)Operands[3])->getReg() != - ((ARMOperand*)Operands[5])->getReg()) && - (((ARMOperand*)Operands[3])->getReg() != - ((ARMOperand*)Operands[4])->getReg())) { - Error(Operands[3]->getStartLoc(), - "destination register must match source register"); - return false; - } ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1); // If we have a three-operand form, make sure to set Rn to be the operand @@ -4173,12 +4145,10 @@ cvtThumbMultiply(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1); Inst.addOperand(Inst.getOperand(0)); ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2); - - return true; } -bool ARMAsmParser:: -cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVLDwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Vd ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); @@ -4188,11 +4158,10 @@ cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -bool ARMAsmParser:: -cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVLDwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Vd ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); @@ -4204,11 +4173,10 @@ cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -bool ARMAsmParser:: -cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVSTwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4218,11 +4186,10 @@ cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -bool ARMAsmParser:: -cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVSTwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4234,7 +4201,6 @@ cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// Parse an ARM memory expression, return false if successful else return true @@ -4471,6 +4437,12 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, ((St == ARM_AM::lsl || St == ARM_AM::ror) && Imm > 31) || ((St == ARM_AM::lsr || St == ARM_AM::asr) && Imm > 32)) return Error(Loc, "immediate shift value out of range"); + // If <ShiftTy> #0, turn it into a no_shift. + if (Imm == 0) + St = ARM_AM::lsl; + // For consistency, treat lsr #32 and asr #32 as having immediate value 0. + if (Imm == 32) + Imm = 0; Amount = Imm; } @@ -4648,7 +4620,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return true; const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal, - getContext()); + getContext()); E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E)); return false; @@ -4983,7 +4955,8 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features); /// Parse an arm instruction mnemonic followed by its operands. -bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, +bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Apply mnemonic aliases before doing anything else, as the destination // mnemnonic may include suffices and we want to handle them normally. @@ -5377,6 +5350,25 @@ validateInstruction(MCInst &Inst, "in register list"); break; } + case ARM::tMUL: { + // The second source operand must be the same register as the destination + // operand. + // + // In this case, we must directly check the parsed operands because the + // cvtThumbMultiply() function is written in such a way that it guarantees + // this first statement is always true for the new Inst. Essentially, the + // destination is unconditionally copied into the second source operand + // without checking to see if it matches what we actually parsed. + if (Operands.size() == 6 && + (((ARMOperand*)Operands[3])->getReg() != + ((ARMOperand*)Operands[5])->getReg()) && + (((ARMOperand*)Operands[3])->getReg() != + ((ARMOperand*)Operands[4])->getReg())) { + return Error(Operands[3]->getStartLoc(), + "destination register must match source register"); + } + break; + } // Like for ldm/stm, push and pop have hi-reg handling version in Thumb2, // so only issue a diagnostic for thumb1. The instructions will be // switched to the t2 encodings in processInstruction() if necessary. @@ -5678,6 +5670,20 @@ bool ARMAsmParser:: processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { switch (Inst.getOpcode()) { + // Alias for alternate form of 'ADR Rd, #imm' instruction. + case ARM::ADDri: { + if (Inst.getOperand(1).getReg() != ARM::PC || + Inst.getOperand(5).getReg() != 0) + return false; + MCInst TmpInst; + TmpInst.setOpcode(ARM::ADR); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } // Aliases for alternate PC+imm syntax of LDR instructions. case ARM::t2LDRpcrel: Inst.setOpcode(ARM::t2LDRpci); @@ -7471,13 +7477,14 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { static const char *getSubtargetFeatureName(unsigned Val); bool ARMAsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, +MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out) { + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { MCInst Inst; - unsigned ErrorInfo; unsigned MatchResult; - MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo); + MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchingInlineAsm); switch (MatchResult) { default: break; case Match_Success: @@ -7540,9 +7547,6 @@ MatchAndEmitInstruction(SMLoc IDLoc, case Match_MnemonicFail: return Error(IDLoc, "invalid instruction", ((ARMOperand*)Operands[0])->getLocRange()); - case Match_ConversionFail: - // The converter function will have already emitted a diagnostic. - return true; case Match_RequiresNotITBlock: return Error(IDLoc, "flag setting instruction only valid outside IT block"); case Match_RequiresITBlock: diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index c90751d..f00142d 100644 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -525,8 +525,9 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, else ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; const char *ReferenceName; - const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, - &ReferenceName); + uint64_t SymbolValue = 0x00000000ffffffffULL & Value; + const char *Name = SymbolLookUp(DisInfo, SymbolValue, &ReferenceType, + Address, &ReferenceName); if (Name) { SymbolicOp.AddSymbol.Name = Name; SymbolicOp.AddSymbol.Present = true; @@ -1523,6 +1524,8 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; } unsigned amt = fieldFromInstruction(Insn, 7, 5); + if (Opc == ARM_AM::ror && amt == 0) + Opc = ARM_AM::rrx; unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode); Inst.addOperand(MCOperand::CreateImm(imm)); @@ -1564,6 +1567,9 @@ static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val, break; } + if (ShOp == ARM_AM::ror && imm == 0) + ShOp = ARM_AM::rrx; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) @@ -2089,16 +2095,28 @@ static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - DecodeStatus S = MCDisassembler::Success; - unsigned imm = (fieldFromInstruction(Insn, 0, 11) << 0) | - (fieldFromInstruction(Insn, 11, 1) << 18) | - (fieldFromInstruction(Insn, 13, 1) << 17) | - (fieldFromInstruction(Insn, 16, 6) << 11) | - (fieldFromInstruction(Insn, 26, 1) << 19); - if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4, + DecodeStatus Status = MCDisassembler::Success; + + // Note the J1 and J2 values are from the encoded instruction. So here + // change them to I1 and I2 values via as documented: + // I1 = NOT(J1 EOR S); + // I2 = NOT(J2 EOR S); + // and build the imm32 with one trailing zero as documented: + // imm32 = SignExtend(S:I1:I2:imm10:imm11:'0', 32); + unsigned S = fieldFromInstruction(Insn, 26, 1); + unsigned J1 = fieldFromInstruction(Insn, 13, 1); + unsigned J2 = fieldFromInstruction(Insn, 11, 1); + unsigned I1 = !(J1 ^ S); + unsigned I2 = !(J2 ^ S); + unsigned imm10 = fieldFromInstruction(Insn, 16, 10); + unsigned imm11 = fieldFromInstruction(Insn, 0, 11); + unsigned tmp = (S << 23) | (I1 << 22) | (I2 << 21) | (imm10 << 11) | imm11; + int imm32 = SignExtend32<24>(tmp << 1); + if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4, true, 4, Inst, Decoder)) - Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1))); - return S; + Inst.addOperand(MCOperand::CreateImm(imm32)); + + return Status; } static DecodeStatus @@ -2701,6 +2719,8 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn, unsigned align = fieldFromInstruction(Insn, 4, 1); unsigned size = fieldFromInstruction(Insn, 6, 2); + if (size == 0 && align == 1) + return MCDisassembler::Fail; align *= (1 << size); switch (Inst.getOpcode()) { @@ -2831,6 +2851,8 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, unsigned align = fieldFromInstruction(Insn, 4, 1); if (size == 0x3) { + if (align == 0) + return MCDisassembler::Fail; size = 4; align = 16; } else { @@ -3170,7 +3192,7 @@ static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, int imm = Val & 0xFF; if (!(Val & 0x100)) imm *= -1; - Inst.addOperand(MCOperand::CreateImm(imm << 2)); + Inst.addOperand(MCOperand::CreateImm(imm * 4)); } return MCDisassembler::Success; @@ -3710,8 +3732,16 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, if (fieldFromInstruction(Insn, 6, 1)) return MCDisassembler::Fail; // UNDEFINED index = fieldFromInstruction(Insn, 7, 1); - if (fieldFromInstruction(Insn, 4, 2) != 0) - align = 4; + + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0 : + align = 0; break; + case 3: + align = 4; break; + default: + return MCDisassembler::Fail; + } + break; } if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) @@ -3769,8 +3799,16 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, if (fieldFromInstruction(Insn, 6, 1)) return MCDisassembler::Fail; // UNDEFINED index = fieldFromInstruction(Insn, 7, 1); - if (fieldFromInstruction(Insn, 4, 2) != 0) - align = 4; + + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0: + align = 0; break; + case 3: + align = 4; break; + default: + return MCDisassembler::Fail; + } + break; } if (Rm != 0xF) { // Writeback @@ -4090,8 +4128,15 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, inc = 2; break; case 2: - if (fieldFromInstruction(Insn, 4, 2)) - align = 4 << fieldFromInstruction(Insn, 4, 2); + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0: + align = 0; break; + case 3: + return MCDisassembler::Fail; + default: + align = 4 << fieldFromInstruction(Insn, 4, 2); break; + } + index = fieldFromInstruction(Insn, 7, 1); if (fieldFromInstruction(Insn, 6, 1)) inc = 2; @@ -4164,8 +4209,15 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, inc = 2; break; case 2: - if (fieldFromInstruction(Insn, 4, 2)) - align = 4 << fieldFromInstruction(Insn, 4, 2); + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0: + align = 0; break; + case 3: + return MCDisassembler::Fail; + default: + align = 4 << fieldFromInstruction(Insn, 4, 2); break; + } + index = fieldFromInstruction(Insn, 7, 1); if (fieldFromInstruction(Insn, 6, 1)) inc = 2; diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 8b9109e..dcc41d9 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -29,11 +29,33 @@ using namespace llvm; /// /// getSORegOffset returns an integer from 0-31, representing '32' as 0. static unsigned translateShiftImm(unsigned imm) { + // lsr #32 and asr #32 exist, but should be encoded as a 0. + assert((imm & ~0x1f) == 0 && "Invalid shift encoding"); + if (imm == 0) return 32; return imm; } +/// Prints the shift value with an immediate value. +static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc, + unsigned ShImm, bool UseMarkup) { + if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm)) + return; + O << ", "; + + assert (!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0"); + O << getShiftOpcStr(ShOpc); + + if (ShOpc != ARM_AM::rrx) { + O << " "; + if (UseMarkup) + O << "<imm:"; + O << "#" << translateShiftImm(ShImm); + if (UseMarkup) + O << ">"; + } +} ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, @@ -45,7 +67,9 @@ ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, } void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - OS << getRegisterName(RegNo); + OS << markup("<reg:") + << getRegisterName(RegNo) + << markup(">"); } void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, @@ -85,10 +109,13 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, printSBitModifierOperand(MI, 6, O); printPredicateOperand(MI, 4, O); - O << '\t' << getRegisterName(Dst.getReg()) - << ", " << getRegisterName(MO1.getReg()); + O << '\t'; + printRegName(O, Dst.getReg()); + O << ", "; + printRegName(O, MO1.getReg()); - O << ", " << getRegisterName(MO2.getReg()); + O << ", "; + printRegName(O, MO2.getReg()); assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); printAnnotation(O, Annot); return; @@ -104,15 +131,20 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, printSBitModifierOperand(MI, 5, O); printPredicateOperand(MI, 3, O); - O << '\t' << getRegisterName(Dst.getReg()) - << ", " << getRegisterName(MO1.getReg()); + O << '\t'; + printRegName(O, Dst.getReg()); + O << ", "; + printRegName(O, MO1.getReg()); if (ARM_AM::getSORegShOp(MO2.getImm()) == ARM_AM::rrx) { printAnnotation(O, Annot); return; } - O << ", #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())); + O << ", " + << markup("<imm:") + << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())) + << markup(">"); printAnnotation(O, Annot); return; } @@ -136,7 +168,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, MI->getOperand(3).getImm() == -4) { O << '\t' << "push"; printPredicateOperand(MI, 4, O); - O << "\t{" << getRegisterName(MI->getOperand(1).getReg()) << "}"; + O << "\t{"; + printRegName(O, MI->getOperand(1).getReg()); + O << "}"; printAnnotation(O, Annot); return; } @@ -159,7 +193,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, MI->getOperand(4).getImm() == 4) { O << '\t' << "pop"; printPredicateOperand(MI, 5, O); - O << "\t{" << getRegisterName(MI->getOperand(0).getReg()) << "}"; + O << "\t{"; + printRegName(O, MI->getOperand(0).getReg()); + O << "}"; printAnnotation(O, Annot); return; } @@ -198,7 +234,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, O << "\tldm"; printPredicateOperand(MI, 1, O); - O << '\t' << getRegisterName(BaseReg); + O << '\t'; + printRegName(O, BaseReg); if (Writeback) O << "!"; O << ", "; printRegisterList(MI, 3, O); @@ -224,9 +261,11 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { unsigned Reg = Op.getReg(); - O << getRegisterName(Reg); + printRegName(O, Reg); } else if (Op.isImm()) { - O << '#' << Op.getImm(); + O << markup("<imm:") + << '#' << Op.getImm() + << markup(">"); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); // If a symbolic branch target was added as a constant expression then print @@ -244,13 +283,16 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } -void ARMInstPrinter::printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); if (MO1.isExpr()) O << *MO1.getExpr(); - else if (MO1.isImm()) - O << "[pc, #" << MO1.getImm() << "]"; + else if (MO1.isImm()) { + O << markup("<mem:") << "[pc, " + << markup("<imm:") << "#" << MO1.getImm() + << markup(">]>", "]"); + } else llvm_unreachable("Unknown LDR label operand?"); } @@ -266,7 +308,7 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum, const MCOperand &MO2 = MI->getOperand(OpNum+1); const MCOperand &MO3 = MI->getOperand(OpNum+2); - O << getRegisterName(MO1.getReg()); + printRegName(O, MO1.getReg()); // Print the shift opc. ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm()); @@ -274,7 +316,8 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum, if (ShOpc == ARM_AM::rrx) return; - O << ' ' << getRegisterName(MO2.getReg()); + O << ' '; + printRegName(O, MO2.getReg()); assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); } @@ -283,14 +326,11 @@ void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - O << getRegisterName(MO1.getReg()); + printRegName(O, MO1.getReg()); // Print the shift opc. - ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); - O << ", " << ARM_AM::getShiftOpcStr(ShOpc); - if (ShOpc == ARM_AM::rrx) - return; - O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())); + printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()), + ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup); } @@ -304,67 +344,51 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, const MCOperand &MO2 = MI->getOperand(Op+1); const MCOperand &MO3 = MI->getOperand(Op+2); - O << "[" << getRegisterName(MO1.getReg()); + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); if (!MO2.getReg()) { - if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. - O << ", #" + if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0. + O << ", " + << markup("<imm:") + << "#" << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) - << ARM_AM::getAM2Offset(MO3.getImm()); - O << "]"; - return; - } - - O << ", " - << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) - << getRegisterName(MO2.getReg()); - - if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm())) - << " #" << ShImm; - O << "]"; -} - -void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); - const MCOperand &MO3 = MI->getOperand(Op+2); - - O << "[" << getRegisterName(MO1.getReg()) << "], "; - - if (!MO2.getReg()) { - unsigned ImmOffs = ARM_AM::getAM2Offset(MO3.getImm()); - O << '#' - << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) - << ImmOffs; + << ARM_AM::getAM2Offset(MO3.getImm()) + << markup(">"); + } + O << "]" << markup(">"); return; } - O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) - << getRegisterName(MO2.getReg()); + O << ", "; + O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())); + printRegName(O, MO2.getReg()); - if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm())) - << " #" << ShImm; + printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO3.getImm()), + ARM_AM::getAM2Offset(MO3.getImm()), UseMarkup); + O << "]" << markup(">"); } void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); const MCOperand &MO2 = MI->getOperand(Op+1); - O << "[" << getRegisterName(MO1.getReg()) << ", " - << getRegisterName(MO2.getReg()) << "]"; + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + O << ", "; + printRegName(O, MO2.getReg()); + O << "]" << markup(">"); } void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); const MCOperand &MO2 = MI->getOperand(Op+1); - O << "[" << getRegisterName(MO1.getReg()) << ", " - << getRegisterName(MO2.getReg()) << ", lsl #1]"; + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + O << ", "; + printRegName(O, MO2.getReg()); + O << ", lsl " << markup("<imm:") << "#1" << markup(">") << "]" << markup(">"); } void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, @@ -376,13 +400,13 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, return; } +#ifndef NDEBUG const MCOperand &MO3 = MI->getOperand(Op+2); unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm()); + assert(IdxMode != ARMII::IndexModePost && + "Should be pre or offset index op"); +#endif - if (IdxMode == ARMII::IndexModePost) { - printAM2PostIndexOp(MI, Op, O); - return; - } printAM2PreOrOffsetIndexOp(MI, Op, O); } @@ -394,19 +418,18 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, if (!MO1.getReg()) { unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); - O << '#' - << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) - << ImmOffs; + O << markup("<imm:") + << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) + << ImmOffs + << markup(">"); return; } - O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) - << getRegisterName(MO1.getReg()); + O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())); + printRegName(O, MO1.getReg()); - if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm())) - << " #" << ShImm; + printRegImmShift(O, ARM_AM::getAM2ShiftOpc(MO2.getImm()), + ARM_AM::getAM2Offset(MO2.getImm()), UseMarkup); } //===--------------------------------------------------------------------===// @@ -419,18 +442,22 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op, const MCOperand &MO2 = MI->getOperand(Op+1); const MCOperand &MO3 = MI->getOperand(Op+2); - O << "[" << getRegisterName(MO1.getReg()) << "], "; + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + O << "], " << markup(">"); if (MO2.getReg()) { - O << (char)ARM_AM::getAM3Op(MO3.getImm()) - << getRegisterName(MO2.getReg()); + O << (char)ARM_AM::getAM3Op(MO3.getImm()); + printRegName(O, MO2.getReg()); return; } unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); - O << '#' + O << markup("<imm:") + << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) - << ImmOffs; + << ImmOffs + << markup(">"); } void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, @@ -439,23 +466,29 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, const MCOperand &MO2 = MI->getOperand(Op+1); const MCOperand &MO3 = MI->getOperand(Op+2); - O << '[' << getRegisterName(MO1.getReg()); + O << markup("<mem:") << '['; + printRegName(O, MO1.getReg()); if (MO2.getReg()) { - O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) - << getRegisterName(MO2.getReg()) << ']'; + O << ", " << getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())); + printRegName(O, MO2.getReg()); + O << ']' << markup(">"); return; } - //If the op is sub we have to print the immediate even if it is 0 + //If the op is sub we have to print the immediate even if it is 0 unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm()); - - if (ImmOffs || (op == ARM_AM::sub)) - O << ", #" + + if (ImmOffs || (op == ARM_AM::sub)) { + O << ", " + << markup("<imm:") + << "#" << ARM_AM::getAddrOpcStr(op) - << ImmOffs; - O << ']'; + << ImmOffs + << markup(">"); + } + O << ']' << markup(">"); } void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, @@ -483,15 +516,15 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, const MCOperand &MO2 = MI->getOperand(OpNum+1); if (MO1.getReg()) { - O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) - << getRegisterName(MO1.getReg()); + O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())); + printRegName(O, MO1.getReg()); return; } unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); - O << '#' - << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) - << ImmOffs; + O << markup("<imm:") + << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs + << markup(">"); } void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, @@ -499,7 +532,9 @@ void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); unsigned Imm = MO.getImm(); - O << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff); + O << markup("<imm:") + << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff) + << markup(">"); } void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, @@ -507,7 +542,8 @@ void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - O << (MO2.getImm() ? "" : "-") << getRegisterName(MO1.getReg()); + O << (MO2.getImm() ? "" : "-"); + printRegName(O, MO1.getReg()); } void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, @@ -515,7 +551,9 @@ void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); unsigned Imm = MO.getImm(); - O << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2); + O << markup("<imm:") + << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2) + << markup(">"); } @@ -536,16 +574,20 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, return; } - O << "[" << getRegisterName(MO1.getReg()); + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm()); unsigned Op = ARM_AM::getAM5Op(MO2.getImm()); if (ImmOffs || Op == ARM_AM::sub) { - O << ", #" + O << ", " + << markup("<imm:") + << "#" << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) - << ImmOffs * 4; + << ImmOffs * 4 + << markup(">"); } - O << "]"; + O << "]" << markup(">"); } void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, @@ -553,18 +595,21 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - O << "[" << getRegisterName(MO1.getReg()); + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); if (MO2.getImm()) { // FIXME: Both darwin as and GNU as violate ARM docs here. O << ", :" << (MO2.getImm() << 3); } - O << "]"; + O << "]" << markup(">"); } void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - O << "[" << getRegisterName(MO1.getReg()) << "]"; + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + O << "]" << markup(">"); } void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, @@ -573,8 +618,10 @@ void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, const MCOperand &MO = MI->getOperand(OpNum); if (MO.getReg() == 0) O << "!"; - else - O << ", " << getRegisterName(MO.getReg()); + else { + O << ", "; + printRegName(O, MO.getReg()); + } } void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, @@ -585,7 +632,9 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, int32_t lsb = CountTrailingZeros_32(v); int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb; assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!"); - O << '#' << lsb << ", #" << width; + O << markup("<imm:") << '#' << lsb << markup(">") + << ", " + << markup("<imm:") << '#' << width << markup(">"); } void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum, @@ -599,10 +648,18 @@ void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum, unsigned ShiftOp = MI->getOperand(OpNum).getImm(); bool isASR = (ShiftOp & (1 << 5)) != 0; unsigned Amt = ShiftOp & 0x1f; - if (isASR) - O << ", asr #" << (Amt == 0 ? 32 : Amt); - else if (Amt) - O << ", lsl #" << Amt; + if (isASR) { + O << ", asr " + << markup("<imm:") + << "#" << (Amt == 0 ? 32 : Amt) + << markup(">"); + } + else if (Amt) { + O << ", lsl " + << markup("<imm:") + << "#" << Amt + << markup(">"); + } } void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, @@ -611,7 +668,7 @@ void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, if (Imm == 0) return; assert(Imm > 0 && Imm < 32 && "Invalid PKH shift immediate value!"); - O << ", lsl #" << Imm; + O << ", lsl " << markup("<imm:") << "#" << Imm << markup(">"); } void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, @@ -621,7 +678,7 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, if (Imm == 0) Imm = 32; assert(Imm > 0 && Imm <= 32 && "Invalid PKH shift immediate value!"); - O << ", asr #" << Imm; + O << ", asr " << markup("<imm:") << "#" << Imm << markup(">"); } void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, @@ -629,7 +686,7 @@ void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, O << "{"; for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) { if (i != OpNum) O << ", "; - O << getRegisterName(MI->getOperand(i).getReg()); + printRegName(O, MI->getOperand(i).getReg()); } O << "}"; } @@ -803,23 +860,29 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, int32_t OffImm = (int32_t)MO.getImm(); + O << markup("<imm:"); if (OffImm == INT32_MIN) O << "#-0"; else if (OffImm < 0) O << "#-" << -OffImm; else O << "#" << OffImm; + O << markup(">"); } void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - O << "#" << MI->getOperand(OpNum).getImm() * 4; + O << markup("<imm:") + << "#" << MI->getOperand(OpNum).getImm() * 4 + << markup(">"); } void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); - O << "#" << (Imm == 0 ? 32 : Imm); + O << markup("<imm:") + << "#" << (Imm == 0 ? 32 : Imm) + << markup(">"); } void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum, @@ -849,10 +912,13 @@ void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op, return; } - O << "[" << getRegisterName(MO1.getReg()); - if (unsigned RegNum = MO2.getReg()) - O << ", " << getRegisterName(RegNum); - O << "]"; + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + if (unsigned RegNum = MO2.getReg()) { + O << ", "; + printRegName(O, RegNum); + } + O << "]" << markup(">"); } void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI, @@ -867,10 +933,15 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI, return; } - O << "[" << getRegisterName(MO1.getReg()); - if (unsigned ImmOffs = MO2.getImm()) - O << ", #" << ImmOffs * Scale; - O << "]"; + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + if (unsigned ImmOffs = MO2.getImm()) { + O << ", " + << markup("<imm:") + << "#" << ImmOffs * Scale + << markup(">"); + } + O << "]" << markup(">"); } void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI, @@ -906,14 +977,12 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum, const MCOperand &MO2 = MI->getOperand(OpNum+1); unsigned Reg = MO1.getReg(); - O << getRegisterName(Reg); + printRegName(O, Reg); // Print the shift opc. assert(MO2.isImm() && "Not a valid t2_so_reg value!"); - ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); - O << ", " << ARM_AM::getShiftOpcStr(ShOpc); - if (ShOpc != ARM_AM::rrx) - O << " #" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())); + printRegImmShift(O, ARM_AM::getSORegShOp(MO2.getImm()), + ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup); } void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, @@ -926,18 +995,27 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, return; } - O << "[" << getRegisterName(MO1.getReg()); + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); bool isSub = OffImm < 0; // Special value for #-0. All others are normal. if (OffImm == INT32_MIN) OffImm = 0; - if (isSub) - O << ", #-" << -OffImm; - else if (OffImm > 0) - O << ", #" << OffImm; - O << "]"; + if (isSub) { + O << ", " + << markup("<imm:") + << "#-" << -OffImm + << markup(">"); + } + else if (OffImm > 0) { + O << ", " + << markup("<imm:") + << "#" << OffImm + << markup(">"); + } + O << "]" << markup(">"); } void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, @@ -946,17 +1024,24 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - O << "[" << getRegisterName(MO1.getReg()); + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); // Don't print +0. + if (OffImm != 0) + O << ", "; + if (OffImm != 0 && UseMarkup) + O << "<imm:"; if (OffImm == INT32_MIN) - O << ", #-0"; + O << "#-0"; else if (OffImm < 0) - O << ", #-" << -OffImm; + O << "#-" << -OffImm; else if (OffImm > 0) - O << ", #" << OffImm; - O << "]"; + O << "#" << OffImm; + if (OffImm != 0 && UseMarkup) + O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, @@ -970,20 +1055,27 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, return; } - O << "[" << getRegisterName(MO1.getReg()); + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); // Don't print +0. + if (OffImm != 0) + O << ", "; + if (OffImm != 0 && UseMarkup) + O << "<imm:"; if (OffImm == INT32_MIN) - O << ", #-0"; + O << "#-0"; else if (OffImm < 0) - O << ", #-" << -OffImm; + O << "#-" << -OffImm; else if (OffImm > 0) - O << ", #" << OffImm; - O << "]"; + O << "#" << OffImm; + if (OffImm != 0 && UseMarkup) + O << ">"; + O << "]" << markup(">"); } void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI, @@ -992,10 +1084,15 @@ void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - O << "[" << getRegisterName(MO1.getReg()); - if (MO2.getImm()) - O << ", #" << MO2.getImm() * 4; - O << "]"; + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + if (MO2.getImm()) { + O << ", " + << markup("<imm:") + << "#" << MO2.getImm() * 4 + << markup(">"); + } + O << "]" << markup(">"); } void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, @@ -1003,11 +1100,12 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); int32_t OffImm = (int32_t)MO1.getImm(); - // Don't print +0. + O << ", " << markup("<imm:"); if (OffImm < 0) - O << ", #-" << -OffImm; + O << "#-" << -OffImm; else - O << ", #" << OffImm; + O << "#" << OffImm; + O << markup(">"); } void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, @@ -1019,12 +1117,18 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); // Don't print +0. + if (OffImm != 0) + O << ", "; + if (OffImm != 0 && UseMarkup) + O << "<imm:"; if (OffImm == INT32_MIN) - O << ", #-0"; + O << "#-0"; else if (OffImm < 0) - O << ", #-" << -OffImm; + O << "#-" << -OffImm; else if (OffImm > 0) - O << ", #" << OffImm; + O << "#" << OffImm; + if (OffImm != 0 && UseMarkup) + O << ">"; } void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, @@ -1034,23 +1138,30 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, const MCOperand &MO2 = MI->getOperand(OpNum+1); const MCOperand &MO3 = MI->getOperand(OpNum+2); - O << "[" << getRegisterName(MO1.getReg()); + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); assert(MO2.getReg() && "Invalid so_reg load / store address!"); - O << ", " << getRegisterName(MO2.getReg()); + O << ", "; + printRegName(O, MO2.getReg()); unsigned ShAmt = MO3.getImm(); if (ShAmt) { assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!"); - O << ", lsl #" << ShAmt; + O << ", lsl " + << markup("<imm:") + << "#" << ShAmt + << markup(">"); } - O << "]"; + O << "]" << markup(">"); } void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); - O << '#' << ARM_AM::getFPImmFloat(MO.getImm()); + O << markup("<imm:") + << '#' << ARM_AM::getFPImmFloat(MO.getImm()) + << markup(">"); } void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, @@ -1058,14 +1169,18 @@ void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, unsigned EncodedImm = MI->getOperand(OpNum).getImm(); unsigned EltBits; uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); - O << "#0x"; + O << markup("<imm:") + << "#0x"; O.write_hex(Val); + O << markup(">"); } void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); - O << "#" << Imm + 1; + O << markup("<imm:") + << "#" << Imm + 1 + << markup(">"); } void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum, @@ -1073,23 +1188,30 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum, unsigned Imm = MI->getOperand(OpNum).getImm(); if (Imm == 0) return; - O << ", ror #"; + O << ", ror " + << markup("<imm:") + << "#"; switch (Imm) { default: assert (0 && "illegal ror immediate!"); case 1: O << "8"; break; case 2: O << "16"; break; case 3: O << "24"; break; } + O << markup(">"); } void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - O << "#" << 16 - MI->getOperand(OpNum).getImm(); + O << markup("<imm:") + << "#" << 16 - MI->getOperand(OpNum).getImm() + << markup(">"); } void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - O << "#" << 32 - MI->getOperand(OpNum).getImm(); + O << markup("<imm:") + << "#" << 32 - MI->getOperand(OpNum).getImm() + << markup(">"); } void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, @@ -1099,7 +1221,9 @@ void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << "}"; } void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, @@ -1107,7 +1231,11 @@ void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1); - O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}"; + O << "{"; + printRegName(O, Reg0); + O << ", "; + printRegName(O, Reg1); + O << "}"; } void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, @@ -1116,7 +1244,11 @@ void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2); - O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}"; + O << "{"; + printRegName(O, Reg0); + O << ", "; + printRegName(O, Reg1); + O << "}"; } void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum, @@ -1124,9 +1256,13 @@ void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 1); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << "}"; } void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum, @@ -1134,16 +1270,23 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 1); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 3); + O << "}"; } void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[]}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << "[]}"; } void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, @@ -1152,7 +1295,11 @@ void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1); - O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}"; + O << "{"; + printRegName(O, Reg0); + O << "[], "; + printRegName(O, Reg1); + O << "[]}"; } void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, @@ -1161,9 +1308,13 @@ void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 1); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << "[]}"; } void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI, @@ -1172,10 +1323,15 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 1); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 3); + O << "[]}"; } void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI, @@ -1184,7 +1340,11 @@ void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2); - O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}"; + O << "{"; + printRegName(O, Reg0); + O << "[], "; + printRegName(O, Reg1); + O << "[]}"; } void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI, @@ -1193,9 +1353,13 @@ void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[]}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 4); + O << "[]}"; } void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI, @@ -1204,10 +1368,15 @@ void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "[]}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 4); + O << "[], "; + printRegName(O, MI->getOperand(OpNum).getReg() + 6); + O << "[]}"; } void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, @@ -1216,9 +1385,13 @@ void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 4); + O << "}"; } void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, @@ -1227,8 +1400,13 @@ void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, // Normally, it's not safe to use register enum values directly with // addition to get the next register, but for VFP registers, the // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "}"; + O << "{"; + printRegName(O, MI->getOperand(OpNum).getReg()); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 2); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 4); + O << ", "; + printRegName(O, MI->getOperand(OpNum).getReg() + 6); + O << "}"; } diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 73d7bfd..b7bab5f 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -126,7 +126,8 @@ public: void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 68c47ac..1ba6ab0 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -593,7 +593,9 @@ public: const object::mach::CPUSubtypeARM Subtype; DarwinARMAsmBackend(const Target &T, const StringRef TT, object::mach::CPUSubtypeARM st) - : ARMAsmBackend(T, TT), Subtype(st) { } + : ARMAsmBackend(T, TT), Subtype(st) { + HasDataInCodeSupport = true; + } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createARMMachObjectWriter(OS, /*Is64Bit=*/false, @@ -687,6 +689,15 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef else if (TheTriple.getArchName() == "armv6" || TheTriple.getArchName() == "thumbv6") return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6); + else if (TheTriple.getArchName() == "armv7f" || + TheTriple.getArchName() == "thumbv7f") + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7F); + else if (TheTriple.getArchName() == "armv7k" || + TheTriple.getArchName() == "thumbv7k") + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7K); + else if (TheTriple.getArchName() == "armv7s" || + TheTriple.getArchName() == "thumbv7s") + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7S); return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7); } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 7d6acbc..99e4f71 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -194,6 +194,10 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case ARM::fixup_arm_uncondbranch: Type = ELF::R_ARM_JUMP24; break; + case ARM::fixup_t2_condbranch: + case ARM::fixup_t2_uncondbranch: + Type = ELF::R_ARM_THM_JUMP24; + break; case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: Type = ELF::R_ARM_MOVT_PREL; @@ -242,6 +246,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_ARM_TARGET1: Type = ELF::R_ARM_TARGET1; break; + case MCSymbolRefExpr::VK_ARM_TARGET2: + Type = ELF::R_ARM_TARGET2; + break; } break; case ARM::fixup_arm_ldst_pcrel_12: diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index d32805e..c1aab9c 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -50,7 +50,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() { Code32Directive = ".code\t32"; WeakRefDirective = "\t.weak\t"; - LCOMMDirectiveType = LCOMM::NoAlignment; HasLEB128 = true; SupportsDebugInformation = true; diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 94f1082..d0e127a 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -35,8 +35,8 @@ STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created."); namespace { class ARMMCCodeEmitter : public MCCodeEmitter { - ARMMCCodeEmitter(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT + ARMMCCodeEmitter(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; const MCSubtargetInfo &STI; const MCContext &CTX; @@ -783,7 +783,7 @@ getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx, // Immediate is always encoded as positive. The 'U' bit controls add vs sub. if (Imm8 < 0) - Imm8 = -Imm8; + Imm8 = -(uint32_t)Imm8; // Scaled by 4. Imm8 /= 4; @@ -934,6 +934,10 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm()); unsigned SBits = getShiftOp(ShOp); + // While "lsr #32" and "asr #32" exist, they are encoded with a 0 in the shift + // amount. However, it would be an easy mistake to make so check here. + assert((ShImm & ~0x1f) == 0 && "Out of range shift amount"); + // {16-13} = Rn // {12} = isAdd // {11-0} = shifter diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index a727e08..b404e6c 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -28,7 +28,7 @@ private: explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr) : Kind(_Kind), Expr(_Expr) {} - + public: /// @name Construction /// @{ @@ -67,9 +67,6 @@ public: static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; } - - static bool classof(const ARMMCExpr *) { return true; } - }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 5df84c8..00ffc94 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -71,6 +71,14 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { else // Use CPU to figure out the exact features. ARMArchFeature = "+v7"; + } else if (Len >= Idx+2 && TT[Idx+1] == 's') { + if (NoCPU) + // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk + // Swift + ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+t2xtpk"; + else + // Use CPU to figure out the exact features. + ARMArchFeature = "+v7"; } else { // v7 CPUs have lots of different feature sets. If no CPU is specified, // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index a51e0fa..2154c93 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -41,6 +41,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter { const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue); + bool requiresExternRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCFragment &Fragment, + unsigned RelocType, const MCSymbolData *SD, + uint64_t FixedValue); + public: ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) @@ -305,6 +311,46 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, Writer->addRelocation(Fragment->getParent(), MRE); } +bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCFragment &Fragment, + unsigned RelocType, + const MCSymbolData *SD, + uint64_t FixedValue) { + // Most cases can be identified purely from the symbol. + if (Writer->doesSymbolRequireExternRelocation(SD)) + return true; + int64_t Value = (int64_t)FixedValue; // The displacement is signed. + int64_t Range; + switch (RelocType) { + default: + return false; + case macho::RIT_ARM_Branch24Bit: + // PC pre-adjustment of 8 for these instructions. + Value -= 8; + // ARM BL/BLX has a 25-bit offset. + Range = 0x1ffffff; + break; + case macho::RIT_ARM_ThumbBranch22Bit: + // PC pre-adjustment of 4 for these instructions. + Value -= 4; + // Thumb BL/BLX has a 24-bit offset. + Range = 0xffffff; + } + // BL/BLX also use external relocations when an internal relocation + // would result in the target being out of range. This gives the linker + // enough information to generate a branch island. + const MCSectionData &SymSD = Asm.getSectionData( + SD->getSymbol().getSection()); + Value += Writer->getSectionAddress(&SymSD); + Value -= Writer->getSectionAddress(Fragment.getParent()); + // If the resultant value would be out of range for an internal relocation, + // use an external instead. + if (Value > Range || Value < -(Range + 1)) + return true; + return false; +} + void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, @@ -373,7 +419,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, } // Check whether we need an external or internal relocation. - if (Writer->doesSymbolRequireExternRelocation(SD)) { + if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD, + FixedValue)) { IsExtern = 1; Index = SD->getIndex(); @@ -410,7 +457,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, if (Type == macho::RIT_ARM_Half) { // The other-half value only gets populated for the movt and movw // relocation entries. - uint32_t Value = 0;; + uint32_t Value = 0; switch ((unsigned)Fixup.getKind()) { default: break; case ARM::fixup_arm_movw_lo16: diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp index ad60e32..70643bc 100644 --- a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp +++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp @@ -51,7 +51,8 @@ namespace { const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; - bool isA9; + bool isLikeA9; + bool isSwift; unsigned MIIdx; MachineInstr* LastMIs[4]; SmallPtrSet<MachineInstr*, 4> IgnoreStall; @@ -60,6 +61,7 @@ namespace { void pushStack(MachineInstr *MI); MachineInstr *getAccDefMI(MachineInstr *MI) const; unsigned getDefReg(MachineInstr *MI) const; + bool hasLoopHazard(MachineInstr *MI) const; bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const; bool FindMLxHazard(MachineInstr *MI); void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, @@ -135,6 +137,50 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { return Reg; } +/// hasLoopHazard - Check whether an MLx instruction is chained to itself across +/// a single-MBB loop. +bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const { + unsigned Reg = MI->getOperand(1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return false; + + MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *DefMI = MRI->getVRegDef(Reg); + while (true) { +outer_continue: + if (DefMI->getParent() != MBB) + break; + + if (DefMI->isPHI()) { + for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) { + if (DefMI->getOperand(i + 1).getMBB() == MBB) { + unsigned SrcReg = DefMI->getOperand(i).getReg(); + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + DefMI = MRI->getVRegDef(SrcReg); + goto outer_continue; + } + } + } + } else if (DefMI->isCopyLike()) { + Reg = DefMI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + DefMI = MRI->getVRegDef(Reg); + continue; + } + } else if (DefMI->isInsertSubreg()) { + Reg = DefMI->getOperand(2).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + DefMI = MRI->getVRegDef(Reg); + continue; + } + } + + break; + } + + return DefMI == MI; +} + bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { // FIXME: Detect integer instructions properly. const MCInstrDesc &MCID = MI->getDesc(); @@ -149,6 +195,19 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { return false; } +static bool isFpMulInstruction(unsigned Opcode) { + switch (Opcode) { + case ARM::VMULS: + case ARM::VMULfd: + case ARM::VMULfq: + case ARM::VMULD: + case ARM::VMULslfd: + case ARM::VMULslfq: + return true; + default: + return false; + } +} bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { if (NumExpand >= ExpandLimit) @@ -171,6 +230,12 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { return true; } + // On Swift, we mostly care about hazards from multiplication instructions + // writing the accumulator and the pipelining of loop iterations by out-of- + // order execution. + if (isSwift) + return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI); + if (IgnoreStall.count(MI)) return false; @@ -179,8 +244,8 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { // preserves the in-order retirement of the instructions. // Look at the next few instructions, if *most* of them can cause hazards, // then the scheduler can't *fix* this, we'd better break up the VMLA. - unsigned Limit1 = isA9 ? 1 : 4; - unsigned Limit2 = isA9 ? 1 : 4; + unsigned Limit1 = isLikeA9 ? 1 : 4; + unsigned Limit2 = isLikeA9 ? 1 : 4; for (unsigned i = 1; i <= 4; ++i) { int Idx = ((int)MIIdx - i + 4) % 4; MachineInstr *NextMI = LastMIs[Idx]; @@ -316,7 +381,8 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { TRI = Fn.getTarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); - isA9 = STI->isCortexA9(); + isLikeA9 = STI->isLikeA9() || STI->isSwift(); + isSwift = STI->isSwift(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; diff --git a/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp index 03d5a9a..3396e8b 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -130,8 +130,7 @@ namespace { void printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16) - >> 16); + short value = MI->getOperand(OpNo).getImm(); assert((value >= -(1 << 9) && value <= (1 << 9) - 1) && "Invalid s10 argument"); O << value; @@ -140,8 +139,7 @@ namespace { void printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16) - >> 16); + short value = MI->getOperand(OpNo).getImm(); assert((value <= (1 << 10) - 1) && "Invalid u10 argument"); O << value; } diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp index fac806e1..f011995 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp @@ -22,7 +22,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index c27caea..5d50610 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -67,8 +67,8 @@ namespace { //! ConstantSDNode predicate for signed 16-bit values /*! - \arg CN The constant SelectionDAG node holding the value - \arg Imm The returned 16-bit value, if returning true + \param CN The constant SelectionDAG node holding the value + \param Imm The returned 16-bit value, if returning true This predicate tests the value in \a CN to see whether it can be represented as a 16-bit, sign-extended quantity. Returns true if @@ -83,12 +83,10 @@ namespace { return true; } else if (vt == MVT::i32) { int32_t i_val = (int32_t) CN->getZExtValue(); - short s_val = (short) i_val; - return i_val == s_val; + return i_val == SignExtend32<16>(i_val); } else { int64_t i_val = (int64_t) CN->getZExtValue(); - short s_val = (short) i_val; - return i_val == s_val; + return i_val == SignExtend64<16>(i_val); } } @@ -99,9 +97,10 @@ namespace { EVT vt = FPN->getValueType(0); if (vt == MVT::f32) { int val = FloatToBits(FPN->getValueAPF().convertToFloat()); - int sval = (int) ((val << 16) >> 16); - Imm = (short) val; - return val == sval; + if (val == SignExtend32<16>(val)) { + Imm = (short) val; + return true; + } } return false; @@ -306,10 +305,10 @@ namespace { } /*! - \arg Op The ISD instruction operand - \arg N The address to be tested - \arg Base The base address - \arg Index The base address index + \param Op The ISD instruction operand + \param N The address to be tested + \param Base The base address + \param Index The base address index */ bool SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, @@ -376,10 +375,10 @@ SPUDAGToDAGISel::SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp, } /*! - \arg Op The ISD instruction (ignored) - \arg N The address to be tested - \arg Base Base address register/pointer - \arg Index Base address index + \param Op The ISD instruction (ignored) + \param N The address to be tested + \param Base Base address register/pointer + \param Index Base address index Examine the input address by a base register plus a signed 10-bit displacement, [r+I10] (D-form address). @@ -542,10 +541,10 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, } /*! - \arg Op The ISD instruction operand - \arg N The address operand - \arg Base The base pointer operand - \arg Index The offset/index operand + \param Op The ISD instruction operand + \param N The address operand + \param Base The base pointer operand + \param Index The offset/index operand If the address \a N can be expressed as an A-form or D-form address, returns false. Otherwise, creates two operands, Base and Index that will become the @@ -570,7 +569,7 @@ SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base, Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue to be used as the last parameter of a CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call - \arg VT the value type for which we want a register class + \param VT the value type for which we want a register class */ SDValue SPUDAGToDAGISel::getRC( MVT VT ) { switch( VT.SimpleTy ) { diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h index 7c4aa14..27d28b2 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h +++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h @@ -80,9 +80,9 @@ namespace llvm { return UseLargeMem; } - /// getTargetDataString - Return the pointer size and type alignment + /// getDataLayoutString - Return the pointer size and type alignment /// properties of this subtarget. - const char *getTargetDataString() const { + const char *getDataLayoutString() const { return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128" "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128" "-s:128:128-n32:64"; diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp index 54764f1..9183165 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -38,12 +38,13 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), - DataLayout(Subtarget.getTargetDataString()), + DL(Subtarget.getDataLayoutString()), InstrInfo(*this), FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()) { + InstrItins(Subtarget.getInstrItineraryData()), + STTI(&TLInfo), VTTI(&TLInfo) { } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h index 3e5d38c..7f53ea6 100644 --- a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h +++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h @@ -20,7 +20,8 @@ #include "SPUSelectionDAGInfo.h" #include "SPUFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetTransformImpl.h" +#include "llvm/DataLayout.h" namespace llvm { @@ -28,12 +29,14 @@ namespace llvm { /// class SPUTargetMachine : public LLVMTargetMachine { SPUSubtarget Subtarget; - const TargetData DataLayout; + const DataLayout DL; SPUInstrInfo InstrInfo; SPUFrameLowering FrameLowering; SPUTargetLowering TLInfo; SPUSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: SPUTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -70,13 +73,19 @@ public: return &InstrInfo.getRegisterInfo(); } - virtual const TargetData *getTargetData() const { - return &DataLayout; + virtual const DataLayout *getDataLayout() const { + return &DL; } virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); diff --git a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp index c8e757b..5c90990 100644 --- a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp +++ b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp @@ -285,14 +285,14 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) { Out << "GlobalValue::LinkerPrivateLinkage"; break; case GlobalValue::LinkerPrivateWeakLinkage: Out << "GlobalValue::LinkerPrivateWeakLinkage"; break; - case GlobalValue::LinkerPrivateWeakDefAutoLinkage: - Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break; case GlobalValue::AvailableExternallyLinkage: Out << "GlobalValue::AvailableExternallyLinkage "; break; case GlobalValue::LinkOnceAnyLinkage: Out << "GlobalValue::LinkOnceAnyLinkage "; break; case GlobalValue::LinkOnceODRLinkage: Out << "GlobalValue::LinkOnceODRLinkage "; break; + case GlobalValue::LinkOnceODRAutoHideLinkage: + Out << "GlobalValue::LinkOnceODRAutoHideLinkage"; break; case GlobalValue::WeakAnyLinkage: Out << "GlobalValue::WeakAnyLinkage"; break; case GlobalValue::WeakODRLinkage: @@ -474,13 +474,15 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, Out << "AttributeWithIndex PAWI;"; nl(Out); for (unsigned i = 0; i < PAL.getNumSlots(); ++i) { unsigned index = PAL.getSlot(i).Index; - Attributes attrs = PAL.getSlot(i).Attrs; - Out << "PAWI.Index = " << index << "U; PAWI.Attrs = Attribute::None "; -#define HANDLE_ATTR(X) \ - if (attrs & Attribute::X) \ - Out << " | Attribute::" #X; \ - attrs &= ~Attribute::X; - + AttrBuilder attrs(PAL.getSlot(i).Attrs); + Out << "PAWI.Index = " << index << "U;\n"; + Out << " {\n AttrBuilder B;\n"; + +#define HANDLE_ATTR(X) \ + if (attrs.hasAttribute(Attributes::X)) \ + Out << " B.addAttribute(Attributes::" #X ");\n"; \ + attrs.removeAttribute(Attributes::X); + HANDLE_ATTR(SExt); HANDLE_ATTR(ZExt); HANDLE_ATTR(NoReturn); @@ -505,19 +507,18 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, HANDLE_ATTR(ReturnsTwice); HANDLE_ATTR(UWTable); HANDLE_ATTR(NonLazyBind); + HANDLE_ATTR(MinSize); #undef HANDLE_ATTR - if (attrs & Attribute::StackAlignment) - Out << " | Attribute::constructStackAlignmentFromInt(" - << Attribute::getStackAlignmentFromAttrs(attrs) - << ")"; - attrs &= ~Attribute::StackAlignment; - assert(attrs == 0 && "Unhandled attribute!"); - Out << ";"; + if (attrs.hasAttribute(Attributes::StackAlignment)) + Out << " B.addStackAlignmentAttr(" << attrs.getStackAlignment() << ")\n"; + attrs.removeAttribute(Attributes::StackAlignment); + assert(!attrs.hasAttributes() && "Unhandled attribute!"); + Out << " PAWI.Attrs = Attributes::get(mod->getContext(), B);\n }"; nl(Out); Out << "Attrs.push_back(PAWI);"; nl(Out); } - Out << name << "_PAL = AttrListPtr::get(Attrs);"; + Out << name << "_PAL = AttrListPtr::get(mod->getContext(), Attrs);"; nl(Out); out(); nl(Out); Out << '}'; nl(Out); diff --git a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h index 9cbe798..30d765d 100644 --- a/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h +++ b/contrib/llvm/lib/Target/CppBackend/CPPTargetMachine.h @@ -15,7 +15,7 @@ #define CPPTARGETMACHINE_H #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" namespace llvm { @@ -35,7 +35,7 @@ struct CPPTargetMachine : public TargetMachine { AnalysisID StartAfter, AnalysisID StopAfter); - virtual const TargetData *getTargetData() const { return 0; } + virtual const DataLayout *getDataLayout() const { return 0; } }; extern Target TheCppBackendTarget; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 5fa4740..c15bce6 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -46,7 +46,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.cpp index ba8e679..73f9d9a 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCallingConvLower.cpp @@ -16,7 +16,7 @@ #include "HexagonCallingConvLower.h" #include "Hexagon.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 703a128..1c891f1 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1350,6 +1350,8 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine } else { setOperationAction(ISD::BR_JT, MVT::Other, Expand); } + // Increase jump tables cutover to 5, was 4. + setMinimumJumpTableEntries(5); setOperationAction(ISD::BR_CC, MVT::i32, Expand); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td index e472d49..a64c7a1 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td @@ -56,6 +56,16 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, bits<1> isPredicated = 0; let TSFlags{6} = isPredicated; + // Dot new value store instructions. + bits<1> isNVStore = 0; + let TSFlags{8} = isNVStore; + + // Fields used for relation models. + string BaseOpcode = ""; + string CextOpcode = ""; + string PredSense = ""; + string PNewValue = ""; + string InputType = ""; // Input is "imm" or "reg" type. // *** The code above must match HexagonBaseInfo.h *** } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index c8f933d..8435440 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/MathExtras.h" #define GET_INSTRINFO_CTOR +#define GET_INSTRMAP_INFO #include "HexagonGenInstrInfo.inc" #include "HexagonGenDFAPacketizer.inc" @@ -1915,6 +1916,15 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { int HexagonInstrInfo:: getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { + enum Hexagon::PredSense inPredSense; + inPredSense = invertPredicate ? Hexagon::PredSense_false : + Hexagon::PredSense_true; + int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense); + if (CondOpcode >= 0) // Valid Conditional opcode/instruction + return CondOpcode; + + // This switch case will be removed once all the instructions have been + // modified to use relation maps. switch(Opc) { case Hexagon::TFR: return !invertPredicate ? Hexagon::TFR_cPt : @@ -1934,24 +1944,6 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { case Hexagon::JMP_EQriPt_nv_V4: return !invertPredicate ? Hexagon::JMP_EQriPt_nv_V4 : Hexagon::JMP_EQriNotPt_nv_V4; - case Hexagon::ADD_ri: - return !invertPredicate ? Hexagon::ADD_ri_cPt : - Hexagon::ADD_ri_cNotPt; - case Hexagon::ADD_rr: - return !invertPredicate ? Hexagon::ADD_rr_cPt : - Hexagon::ADD_rr_cNotPt; - case Hexagon::XOR_rr: - return !invertPredicate ? Hexagon::XOR_rr_cPt : - Hexagon::XOR_rr_cNotPt; - case Hexagon::AND_rr: - return !invertPredicate ? Hexagon::AND_rr_cPt : - Hexagon::AND_rr_cNotPt; - case Hexagon::OR_rr: - return !invertPredicate ? Hexagon::OR_rr_cPt : - Hexagon::OR_rr_cNotPt; - case Hexagon::SUB_rr: - return !invertPredicate ? Hexagon::SUB_rr_cPt : - Hexagon::SUB_rr_cNotPt; case Hexagon::COMBINE_rr: return !invertPredicate ? Hexagon::COMBINE_rr_cPt : Hexagon::COMBINE_rr_cNotPt; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td index c0c0df6..1d4a706 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td @@ -15,6 +15,18 @@ include "HexagonInstrFormats.td" include "HexagonImmediates.td" //===----------------------------------------------------------------------===// +// Classes used for relation maps. +//===----------------------------------------------------------------------===// +// PredRel - Filter class used to relate non-predicated instructions with their +// predicated forms. +class PredRel; +// PredNewRel - Filter class used to relate predicated instructions with their +// predicate-new forms. +class PredNewRel: PredRel; +// ImmRegRel - Filter class used to relate instructions having reg-reg form +// with their reg-imm counterparts. +class ImmRegRel; +//===----------------------------------------------------------------------===// // Hexagon Instruction Predicate Definitions. //===----------------------------------------------------------------------===// def HasV2T : Predicate<"Subtarget.hasV2TOps()">; @@ -148,37 +160,91 @@ multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> { } //===----------------------------------------------------------------------===// -// ALU32/ALU + +// ALU32/ALU (Instructions with register-register form) //===----------------------------------------------------------------------===// -// Add. -let isCommutable = 1, isPredicable = 1 in -def ADD_rr : ALU32_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = add($src1, $src2)", - [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; +multiclass ALU32_Pbase<string mnemonic, bit isNot, + bit isPredNew> { -let isPredicable = 1 in -def ADD_ri : ALU32_ri<(outs IntRegs:$dst), - (ins IntRegs:$src1, s16Imm:$src2), - "$dst = add($src1, #$src2)", - [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1), - s16ImmPred:$src2))]>; + let PNewValue = #!if(isPredNew, "new", "") in + def #NAME# : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", + ") $dst = ")#mnemonic#"($src2, $src3)", + []>; +} -// Logical operations. -let isPredicable = 1 in -def XOR_rr : ALU32_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = xor($src1, $src2)", - [(set (i32 IntRegs:$dst), (xor (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; +multiclass ALU32_Pred<string mnemonic, bit PredNot> { + let PredSense = #!if(PredNot, "false", "true") in { + defm _c#NAME# : ALU32_Pbase<mnemonic, PredNot, 0>; + // Predicate new + defm _cdn#NAME# : ALU32_Pbase<mnemonic, PredNot, 1>; + } +} -let isCommutable = 1, isPredicable = 1 in -def AND_rr : ALU32_rr<(outs IntRegs:$dst), +let InputType = "reg" in +multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_rr in { + let isPredicable = 1 in + def #NAME# : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = and($src1, $src2)", - [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; + "$dst = "#mnemonic#"($src1, $src2)", + [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; + + let neverHasSideEffects = 1, isPredicated = 1 in { + defm Pt : ALU32_Pred<mnemonic, 0>; + defm NotPt : ALU32_Pred<mnemonic, 1>; + } + } +} + +let isCommutable = 1 in { + defm ADD_rr : ALU32_base<"add", "ADD", add>, ImmRegRel, PredNewRel; + defm AND_rr : ALU32_base<"and", "AND", and>, ImmRegRel, PredNewRel; + defm XOR_rr : ALU32_base<"xor", "XOR", xor>, ImmRegRel, PredNewRel; + defm OR_rr : ALU32_base<"or", "OR", or>, ImmRegRel, PredNewRel; +} + +defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel; + +//===----------------------------------------------------------------------===// +// ALU32/ALU (ADD with register-immediate form) +//===----------------------------------------------------------------------===// +multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> { + let PNewValue = #!if(isPredNew, "new", "") in + def #NAME# : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, s8Imm: $src3), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", + ") $dst = ")#mnemonic#"($src2, #$src3)", + []>; +} + +multiclass ALU32ri_Pred<string mnemonic, bit PredNot> { + let PredSense = #!if(PredNot, "false", "true") in { + defm _c#NAME# : ALU32ri_Pbase<mnemonic, PredNot, 0>; + // Predicate new + defm _cdn#NAME# : ALU32ri_Pbase<mnemonic, PredNot, 1>; + } +} + +let InputType = "imm" in +multiclass ALU32ri_base<string mnemonic, string CextOp, SDNode OpNode> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_ri in { + let isPredicable = 1 in + def #NAME# : ALU32_ri<(outs IntRegs:$dst), + (ins IntRegs:$src1, s16Imm:$src2), + "$dst = "#mnemonic#"($src1, #$src2)", + [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1), + (s16ImmPred:$src2)))]>; + + let neverHasSideEffects = 1, isPredicated = 1 in { + defm Pt : ALU32ri_Pred<mnemonic, 0>; + defm NotPt : ALU32ri_Pred<mnemonic, 1>; + } + } +} + +defm ADD_ri : ALU32ri_base<"add", "ADD", add>, ImmRegRel, PredNewRel; def OR_ri : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), @@ -197,13 +263,6 @@ def AND_ri : ALU32_ri<(outs IntRegs:$dst), [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1), s10ImmPred:$src2))]>; -let isCommutable = 1, isPredicable = 1 in -def OR_rr : ALU32_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = or($src1, $src2)", - [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; - // Negate. def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), "$dst = neg($src1)", @@ -214,14 +273,6 @@ def NOP : ALU32_rr<(outs), (ins), "nop", []>; -// Subtract. -let isPredicable = 1 in -def SUB_rr : ALU32_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = sub($src1, $src2)", - [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; - // Rd32=sub(#s10,Rs32) def SUB_ri : ALU32_ri<(outs IntRegs:$dst), (ins s10Imm:$src1, IntRegs:$src2), @@ -348,56 +399,6 @@ def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), // ALU32/PRED + //===----------------------------------------------------------------------===// -// Conditional add. -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_ri_cPt : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3), - "if ($src1) $dst = add($src2, #$src3)", - []>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_ri_cNotPt : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3), - "if (!$src1) $dst = add($src2, #$src3)", - []>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_ri_cdnPt : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3), - "if ($src1.new) $dst = add($src2, #$src3)", - []>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_ri_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3), - "if (!$src1.new) $dst = add($src2, #$src3)", - []>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_rr_cPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst = add($src2, $src3)", - []>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst = add($src2, $src3)", - []>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst = add($src2, $src3)", - []>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst = add($src2, $src3)", - []>; - - // Conditional combine. let neverHasSideEffects = 1, isPredicated = 1 in @@ -424,108 +425,6 @@ def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst), "if (!$src1.new) $dst = combine($src2, $src3)", []>; -// Conditional logical operations. - -let isPredicated = 1 in -def XOR_rr_cPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst = xor($src2, $src3)", - []>; - -let isPredicated = 1 in -def XOR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst = xor($src2, $src3)", - []>; - -let isPredicated = 1 in -def XOR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst = xor($src2, $src3)", - []>; - -let isPredicated = 1 in -def XOR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst = xor($src2, $src3)", - []>; - -let isPredicated = 1 in -def AND_rr_cPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst = and($src2, $src3)", - []>; - -let isPredicated = 1 in -def AND_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst = and($src2, $src3)", - []>; - -let isPredicated = 1 in -def AND_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst = and($src2, $src3)", - []>; - -let isPredicated = 1 in -def AND_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst = and($src2, $src3)", - []>; - -let isPredicated = 1 in -def OR_rr_cPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst = or($src2, $src3)", - []>; - -let isPredicated = 1 in -def OR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst = or($src2, $src3)", - []>; - -let isPredicated = 1 in -def OR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst = or($src2, $src3)", - []>; - -let isPredicated = 1 in -def OR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst = or($src2, $src3)", - []>; - - -// Conditional subtract. - -let isPredicated = 1 in -def SUB_rr_cPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst = sub($src2, $src3)", - []>; - -let isPredicated = 1 in -def SUB_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst = sub($src2, $src3)", - []>; - -let isPredicated = 1 in -def SUB_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst = sub($src2, $src3)", - []>; - -let isPredicated = 1 in -def SUB_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst = sub($src2, $src3)", - []>; - - // Conditional transfer. let neverHasSideEffects = 1, isPredicated = 1 in def TFR_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), @@ -3546,4 +3445,31 @@ include "HexagonInstrInfoV5.td" // V5 Instructions - //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Generate mapping table to relate non-predicate instructions with their +// predicated formats - true and false. +// + +def getPredOpcode : InstrMapping { + let FilterClass = "PredRel"; + // Instructions with the same BaseOpcode and isNVStore values form a row. + let RowFields = ["BaseOpcode", "isNVStore", "PNewValue"]; + // Instructions with the same predicate sense form a column. + let ColFields = ["PredSense"]; + // The key column is the unpredicated instructions. + let KeyCol = [""]; + // Value columns are PredSense=true and PredSense=false + let ValueCols = [["true"], ["false"]]; +} +//===----------------------------------------------------------------------===// +// Generate mapping table to relate predicated instructions with their .new +// format. +// +def getPredNewOpcode : InstrMapping { + let FilterClass = "PredNewRel"; + let RowFields = ["BaseOpcode", "PredSense", "isNVStore"]; + let ColFields = ["PNewValue"]; + let KeyCol = [""]; + let ValueCols = [["new"]]; +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp new file mode 100644 index 0000000..0e9ef48 --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -0,0 +1,681 @@ +//===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// MachineScheduler schedules machine instructions after phi elimination. It +// preserves LiveIntervals so it can be invoked before register allocation. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "misched" + +#include "HexagonMachineScheduler.h" + +#include <queue> + +using namespace llvm; + +/// Platform specific modifications to DAG. +void VLIWMachineScheduler::postprocessDAG() { + SUnit* LastSequentialCall = NULL; + // Currently we only catch the situation when compare gets scheduled + // before preceding call. + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { + // Remember the call. + if (SUnits[su].getInstr()->isCall()) + LastSequentialCall = &(SUnits[su]); + // Look for a compare that defines a predicate. + else if (SUnits[su].getInstr()->isCompare() && LastSequentialCall) + SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier)); + } +} + +/// Check if scheduling of this SU is possible +/// in the current packet. +/// It is _not_ precise (statefull), it is more like +/// another heuristic. Many corner cases are figured +/// empirically. +bool VLIWResourceModel::isResourceAvailable(SUnit *SU) { + if (!SU || !SU->getInstr()) + return false; + + // First see if the pipeline could receive this instruction + // in the current cycle. + switch (SU->getInstr()->getOpcode()) { + default: + if (!ResourcesModel->canReserveResources(SU->getInstr())) + return false; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + break; + } + + // Now see if there are no other dependencies to instructions already + // in the packet. + for (unsigned i = 0, e = Packet.size(); i != e; ++i) { + if (Packet[i]->Succs.size() == 0) + continue; + for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), + E = Packet[i]->Succs.end(); I != E; ++I) { + // Since we do not add pseudos to packets, might as well + // ignore order dependencies. + if (I->isCtrl()) + continue; + + if (I->getSUnit() == SU) + return false; + } + } + return true; +} + +/// Keep track of available resources. +bool VLIWResourceModel::reserveResources(SUnit *SU) { + bool startNewCycle = false; + // Artificially reset state. + if (!SU) { + ResourcesModel->clearResources(); + Packet.clear(); + TotalPackets++; + return false; + } + // If this SU does not fit in the packet + // start a new one. + if (!isResourceAvailable(SU)) { + ResourcesModel->clearResources(); + Packet.clear(); + TotalPackets++; + startNewCycle = true; + } + + switch (SU->getInstr()->getOpcode()) { + default: + ResourcesModel->reserveResources(SU->getInstr()); + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + break; + } + Packet.push_back(SU); + +#ifndef NDEBUG + DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); + for (unsigned i = 0, e = Packet.size(); i != e; ++i) { + DEBUG(dbgs() << "\t[" << i << "] SU("); + DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); + DEBUG(Packet[i]->getInstr()->dump()); + } +#endif + + // If packet is now full, reset the state so in the next cycle + // we start fresh. + if (Packet.size() >= SchedModel->getIssueWidth()) { + ResourcesModel->clearResources(); + Packet.clear(); + TotalPackets++; + startNewCycle = true; + } + + return startNewCycle; +} + +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. [RegionBegin, RegionEnd) +/// only includes instructions that have DAG nodes, not scheduling boundaries. +void VLIWMachineScheduler::schedule() { + DEBUG(dbgs() + << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber() + << " " << BB->getName() + << " in_func " << BB->getParent()->getFunction()->getName() + << " at loop depth " << MLI.getLoopDepth(BB) + << " \n"); + + buildDAGWithRegPressure(); + + // Postprocess the DAG to add platform specific artificial dependencies. + postprocessDAG(); + + // To view Height/Depth correctly, they should be accessed at least once. + DEBUG(unsigned maxH = 0; + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + if (SUnits[su].getHeight() > maxH) + maxH = SUnits[su].getHeight(); + dbgs() << "Max Height " << maxH << "\n";); + DEBUG(unsigned maxD = 0; + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + if (SUnits[su].getDepth() > maxD) + maxD = SUnits[su].getDepth(); + dbgs() << "Max Depth " << maxD << "\n";); + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + initQueues(); + + bool IsTopNode = false; + while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + if (!checkSchedLimit()) + break; + + scheduleMI(SU, IsTopNode); + + updateQueues(SU, IsTopNode); + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); +} + +void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { + DAG = static_cast<VLIWMachineScheduler*>(dag); + SchedModel = DAG->getSchedModel(); + TRI = DAG->TRI; + Top.init(DAG, SchedModel); + Bot.init(DAG, SchedModel); + + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or + // are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); + const TargetMachine &TM = DAG->MF.getTarget(); + Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + + Top.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel()); + Bot.ResourceModel = new VLIWResourceModel(TM, DAG->getSchedModel()); + + assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); +} + +void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { + if (SU->isScheduled) + return; + + for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; + unsigned MinLatency = I->getMinLatency(); +#ifndef NDEBUG + Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); +#endif + if (SU->TopReadyCycle < PredReadyCycle + MinLatency) + SU->TopReadyCycle = PredReadyCycle + MinLatency; + } + Top.releaseNode(SU, SU->TopReadyCycle); +} + +void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { + if (SU->isScheduled) + return; + + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; + unsigned MinLatency = I->getMinLatency(); +#ifndef NDEBUG + Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); +#endif + if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) + SU->BotReadyCycle = SuccReadyCycle + MinLatency; + } + Bot.releaseNode(SU, SU->BotReadyCycle); +} + +/// Does this SU have a hazard within the current instruction group. +/// +/// The scheduler supports two modes of hazard recognition. The first is the +/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that +/// supports highly complicated in-order reservation tables +/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. +/// +/// The second is a streamlined mechanism that checks for hazards based on +/// simple counters that the scheduler itself maintains. It explicitly checks +/// for instruction dispatch limitations, including the number of micro-ops that +/// can dispatch per cycle. +/// +/// TODO: Also check whether the SU must start a new group. +bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) { + if (HazardRec->isEnabled()) + return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; + + unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); + if (IssueCount + uops > SchedModel->getIssueWidth()) + return true; + + return false; +} + +void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU, + unsigned ReadyCycle) { + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + // Check for interlocks first. For the purpose of other heuristics, an + // instruction that cannot issue appears as if it's not in the ReadyQueue. + if (ReadyCycle > CurrCycle || checkHazard(SU)) + + Pending.push(SU); + else + Available.push(SU); +} + +/// Move the boundary of scheduled code by one cycle. +void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() { + unsigned Width = SchedModel->getIssueWidth(); + IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; + + assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); + + if (!HazardRec->isEnabled()) { + // Bypass HazardRec virtual calls. + CurrCycle = NextCycle; + } else { + // Bypass getHazardType calls in case of long latency. + for (; CurrCycle != NextCycle; ++CurrCycle) { + if (isTop()) + HazardRec->AdvanceCycle(); + else + HazardRec->RecedeCycle(); + } + } + CheckPending = true; + + DEBUG(dbgs() << "*** " << Available.getName() << " cycle " + << CurrCycle << '\n'); +} + +/// Move the boundary of scheduled code by one SUnit. +void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) { + bool startNewCycle = false; + + // Update the reservation table. + if (HazardRec->isEnabled()) { + if (!isTop() && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + HazardRec->EmitInstruction(SU); + } + + // Update DFA model. + startNewCycle = ResourceModel->reserveResources(SU); + + // Check the instruction group dispatch limit. + // TODO: Check if this SU must end a dispatch group. + IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); + if (startNewCycle) { + DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); + bumpCycle(); + } + else + DEBUG(dbgs() << "*** IssueCount " << IssueCount + << " at cycle " << CurrCycle << '\n'); +} + +/// Release pending ready nodes in to the available queue. This makes them +/// visible to heuristics. +void ConvergingVLIWScheduler::SchedBoundary::releasePending() { + // If the available queue is empty, it is safe to reset MinReadyCycle. + if (Available.empty()) + MinReadyCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = Pending.size(); i != e; ++i) { + SUnit *SU = *(Pending.begin()+i); + unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; + + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + if (ReadyCycle > CurrCycle) + continue; + + if (checkHazard(SU)) + continue; + + Available.push(SU); + Pending.remove(Pending.begin()+i); + --i; --e; + } + CheckPending = false; +} + +/// Remove SU from the ready set for this boundary. +void ConvergingVLIWScheduler::SchedBoundary::removeReady(SUnit *SU) { + if (Available.isInQueue(SU)) + Available.remove(Available.find(SU)); + else { + assert(Pending.isInQueue(SU) && "bad ready count"); + Pending.remove(Pending.find(SU)); + } +} + +/// If this queue only has one ready candidate, return it. As a side effect, +/// advance the cycle until at least one node is ready. If multiple instructions +/// are ready, return NULL. +SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() { + if (CheckPending) + releasePending(); + + for (unsigned i = 0; Available.empty(); ++i) { + assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && + "permanent hazard"); (void)i; + ResourceModel->reserveResources(0); + bumpCycle(); + releasePending(); + } + if (Available.size() == 1) + return *Available.begin(); + return NULL; +} + +#ifndef NDEBUG +void ConvergingVLIWScheduler::traceCandidate(const char *Label, + const ReadyQueue &Q, + SUnit *SU, PressureElement P) { + dbgs() << Label << " " << Q.getName() << " "; + if (P.isValid()) + dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease + << " "; + else + dbgs() << " "; + SU->dump(DAG); +} +#endif + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +static SUnit *getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->getSUnit(); + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return 0; + OnlyAvailablePred = &Pred; + } + } + return OnlyAvailablePred; +} + +/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor +/// of SU, return it, otherwise return null. +static SUnit *getSingleUnscheduledSucc(SUnit *SU) { + SUnit *OnlyAvailableSucc = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + SUnit &Succ = *I->getSUnit(); + if (!Succ.isScheduled) { + // We found an available, but not scheduled, successor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ) + return 0; + OnlyAvailableSucc = &Succ; + } + } + return OnlyAvailableSucc; +} + +// Constants used to denote relative importance of +// heuristic components for cost computation. +static const unsigned PriorityOne = 200; +static const unsigned PriorityTwo = 100; +static const unsigned PriorityThree = 50; +static const unsigned PriorityFour = 20; +static const unsigned ScaleTwo = 10; +static const unsigned FactorOne = 2; + +/// Single point to compute overall scheduling cost. +/// TODO: More heuristics will be used soon. +int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, + SchedCandidate &Candidate, + RegPressureDelta &Delta, + bool verbose) { + // Initial trivial priority. + int ResCount = 1; + + // Do not waste time on a node that is already scheduled. + if (!SU || SU->isScheduled) + return ResCount; + + // Forced priority is high. + if (SU->isScheduleHigh) + ResCount += PriorityOne; + + // Critical path first. + if (Q.getID() == TopQID) { + ResCount += (SU->getHeight() * ScaleTwo); + + // If resources are available for it, multiply the + // chance of scheduling. + if (Top.ResourceModel->isResourceAvailable(SU)) + ResCount <<= FactorOne; + } else { + ResCount += (SU->getDepth() * ScaleTwo); + + // If resources are available for it, multiply the + // chance of scheduling. + if (Bot.ResourceModel->isResourceAvailable(SU)) + ResCount <<= FactorOne; + } + + unsigned NumNodesBlocking = 0; + if (Q.getID() == TopQID) { + // How many SUs does it block from scheduling? + // Look at all of the successors of this node. + // Count the number of nodes that + // this node is the sole unscheduled node for. + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (getSingleUnscheduledPred(I->getSUnit()) == SU) + ++NumNodesBlocking; + } else { + // How many unscheduled predecessors block this node? + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (getSingleUnscheduledSucc(I->getSUnit()) == SU) + ++NumNodesBlocking; + } + ResCount += (NumNodesBlocking * ScaleTwo); + + // Factor in reg pressure as a heuristic. + ResCount -= (Delta.Excess.UnitIncrease*PriorityThree); + ResCount -= (Delta.CriticalMax.UnitIncrease*PriorityThree); + + DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")"); + + return ResCount; +} + +/// Pick the best candidate from the top queue. +/// +/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during +/// DAG building. To adjust for the current scheduling location we need to +/// maintain the number of vreg uses remaining to be top-scheduled. +ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler:: +pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, + SchedCandidate &Candidate) { + DEBUG(Q.dump()); + + // getMaxPressureDelta temporarily modifies the tracker. + RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); + + // BestSU remains NULL if no top candidates beat the best existing candidate. + CandResult FoundCandidate = NoCand; + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + RegPressureDelta RPDelta; + TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + + int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); + + // Initialize the candidate if needed. + if (!Candidate.SU) { + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = NodeOrder; + continue; + } + + // Best cost. + if (CurrentCost > Candidate.SCost) { + DEBUG(traceCandidate("CCAND", Q, *I)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + continue; + } + + // Fall through to original instruction order. + // Only consider node order if Candidate was chosen from this Q. + if (FoundCandidate == NoCand) + continue; + } + return FoundCandidate; +} + +/// Pick the best candidate node from either the top or bottom queue. +SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { + // Schedule as far as possible in the direction of no choice. This is most + // efficient, but also provides the best heuristics for CriticalPSets. + if (SUnit *SU = Bot.pickOnlyChoice()) { + IsTopNode = false; + return SU; + } + if (SUnit *SU = Top.pickOnlyChoice()) { + IsTopNode = true; + return SU; + } + SchedCandidate BotCand; + // Prefer bottom scheduling when heuristics are silent. + CandResult BotResult = pickNodeFromQueue(Bot.Available, + DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + + // If either Q has a single candidate that provides the least increase in + // Excess pressure, we can immediately schedule from that Q. + // + // RegionCriticalPSets summarizes the pressure within the scheduled region and + // affects picking from either Q. If scheduling in one direction must + // increase pressure for one of the excess PSets, then schedule in that + // direction first to provide more freedom in the other direction. + if (BotResult == SingleExcess || BotResult == SingleCritical) { + IsTopNode = false; + return BotCand.SU; + } + // Check if the top Q has a better candidate. + SchedCandidate TopCand; + CandResult TopResult = pickNodeFromQueue(Top.Available, + DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + + if (TopResult == SingleExcess || TopResult == SingleCritical) { + IsTopNode = true; + return TopCand.SU; + } + // If either Q has a single candidate that minimizes pressure above the + // original region's pressure pick it. + if (BotResult == SingleMax) { + IsTopNode = false; + return BotCand.SU; + } + if (TopResult == SingleMax) { + IsTopNode = true; + return TopCand.SU; + } + if (TopCand.SCost > BotCand.SCost) { + IsTopNode = true; + return TopCand.SU; + } + // Otherwise prefer the bottom candidate in node order. + IsTopNode = false; + return BotCand.SU; +} + +/// Pick the best node to balance the schedule. Implements MachineSchedStrategy. +SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && + Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); + return NULL; + } + SUnit *SU; + if (llvm::ForceTopDown) { + SU = Top.pickOnlyChoice(); + if (!SU) { + SchedCandidate TopCand; + CandResult TopResult = + pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + (void)TopResult; + SU = TopCand.SU; + } + IsTopNode = true; + } else if (llvm::ForceBottomUp) { + SU = Bot.pickOnlyChoice(); + if (!SU) { + SchedCandidate BotCand; + CandResult BotResult = + pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + (void)BotResult; + SU = BotCand.SU; + } + IsTopNode = false; + } else { + SU = pickNodeBidrectional(IsTopNode); + } + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); + + DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") + << " Scheduling Instruction in cycle " + << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; + SU->dump(DAG)); + return SU; +} + +/// Update the scheduler's state after scheduling a node. This is the same node +/// that was just returned by pickNode(). However, VLIWMachineScheduler needs +/// to update it's state based on the current cycle before MachineSchedStrategy +/// does. +void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { + if (IsTopNode) { + SU->TopReadyCycle = Top.CurrCycle; + Top.bumpNode(SU); + } else { + SU->BotReadyCycle = Bot.CurrCycle; + Bot.bumpNode(SU); + } +} + diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h new file mode 100644 index 0000000..fe0242a --- /dev/null +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -0,0 +1,244 @@ +//===-- HexagonMachineScheduler.h - Custom Hexagon MI scheduler. ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Custom Hexagon MI scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONASMPRINTER_H +#define HEXAGONASMPRINTER_H + +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/PriorityQueue.h" + +using namespace llvm; + +namespace llvm { +//===----------------------------------------------------------------------===// +// ConvergingVLIWScheduler - Implementation of the standard +// MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +class VLIWResourceModel { + /// ResourcesModel - Represents VLIW state. + /// Not limited to VLIW targets per say, but assumes + /// definition of DFA by a target. + DFAPacketizer *ResourcesModel; + + const TargetSchedModel *SchedModel; + + /// Local packet/bundle model. Purely + /// internal to the MI schedulre at the time. + std::vector<SUnit*> Packet; + + /// Total packets created. + unsigned TotalPackets; + +public: +VLIWResourceModel(const TargetMachine &TM, const TargetSchedModel *SM) : + SchedModel(SM), TotalPackets(0) { + ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM,NULL); + + // This hard requirement could be relaxed, + // but for now do not let it proceed. + assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + Packet.resize(SchedModel->getIssueWidth()); + Packet.clear(); + ResourcesModel->clearResources(); + } + + ~VLIWResourceModel() { + delete ResourcesModel; + } + + void resetPacketState() { + Packet.clear(); + } + + void resetDFA() { + ResourcesModel->clearResources(); + } + + void reset() { + Packet.clear(); + ResourcesModel->clearResources(); + } + + bool isResourceAvailable(SUnit *SU); + bool reserveResources(SUnit *SU); + unsigned getTotalPackets() const { return TotalPackets; } +}; + +/// Extend the standard ScheduleDAGMI to provide more context and override the +/// top-level schedule() driver. +class VLIWMachineScheduler : public ScheduleDAGMI { +public: + VLIWMachineScheduler(MachineSchedContext *C, MachineSchedStrategy *S): + ScheduleDAGMI(C, S) {} + + /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's + /// time to do some work. + virtual void schedule(); + /// Perform platform specific DAG postprocessing. + void postprocessDAG(); +}; + +/// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics +/// to balance the schedule. +class ConvergingVLIWScheduler : public MachineSchedStrategy { + + /// Store the state used by ConvergingVLIWScheduler heuristics, required + /// for the lifetime of one invocation of pickNode(). + struct SchedCandidate { + // The best SUnit candidate. + SUnit *SU; + + // Register pressure values for the best candidate. + RegPressureDelta RPDelta; + + // Best scheduling cost. + int SCost; + + SchedCandidate(): SU(NULL), SCost(0) {} + }; + /// Represent the type of SchedCandidate found within a single queue. + enum CandResult { + NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure, + BestCost}; + + /// Each Scheduling boundary is associated with ready queues. It tracks the + /// current cycle in whichever direction at has moved, and maintains the state + /// of "hazards" and other interlocks at the current cycle. + struct SchedBoundary { + VLIWMachineScheduler *DAG; + const TargetSchedModel *SchedModel; + + ReadyQueue Available; + ReadyQueue Pending; + bool CheckPending; + + ScheduleHazardRecognizer *HazardRec; + VLIWResourceModel *ResourceModel; + + unsigned CurrCycle; + unsigned IssueCount; + + /// MinReadyCycle - Cycle of the soonest available instruction. + unsigned MinReadyCycle; + + // Remember the greatest min operand latency. + unsigned MaxMinLatency; + + /// Pending queues extend the ready queues with the same ID and the + /// PendingFlag set. + SchedBoundary(unsigned ID, const Twine &Name): + DAG(0), SchedModel(0), Available(ID, Name+".A"), + Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"), + CheckPending(false), HazardRec(0), ResourceModel(0), + CurrCycle(0), IssueCount(0), + MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} + + ~SchedBoundary() { + delete ResourceModel; + delete HazardRec; + } + + void init(VLIWMachineScheduler *dag, const TargetSchedModel *smodel) { + DAG = dag; + SchedModel = smodel; + } + + bool isTop() const { + return Available.getID() == ConvergingVLIWScheduler::TopQID; + } + + bool checkHazard(SUnit *SU); + + void releaseNode(SUnit *SU, unsigned ReadyCycle); + + void bumpCycle(); + + void bumpNode(SUnit *SU); + + void releasePending(); + + void removeReady(SUnit *SU); + + SUnit *pickOnlyChoice(); + }; + + VLIWMachineScheduler *DAG; + const TargetSchedModel *SchedModel; + const TargetRegisterInfo *TRI; + + // State of the top and bottom scheduled instruction boundaries. + SchedBoundary Top; + SchedBoundary Bot; + +public: + /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) + enum { + TopQID = 1, + BotQID = 2, + LogMaxQID = 2 + }; + + ConvergingVLIWScheduler(): + DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + + virtual void initialize(ScheduleDAGMI *dag); + + virtual SUnit *pickNode(bool &IsTopNode); + + virtual void schedNode(SUnit *SU, bool IsTopNode); + + virtual void releaseTopNode(SUnit *SU); + + virtual void releaseBottomNode(SUnit *SU); + + unsigned ReportPackets() { + return Top.ResourceModel->getTotalPackets() + + Bot.ResourceModel->getTotalPackets(); + } + +protected: + SUnit *pickNodeBidrectional(bool &IsTopNode); + + int SchedulingCost(ReadyQueue &Q, + SUnit *SU, SchedCandidate &Candidate, + RegPressureDelta &Delta, bool verbose); + + CandResult pickNodeFromQueue(ReadyQueue &Q, + const RegPressureTracker &RPTracker, + SchedCandidate &Candidate); +#ifndef NDEBUG + void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU, + PressureElement P = PressureElement()); +#endif +}; + +} // namespace + + +#endif diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp index 7ece408..1e91c39 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -337,7 +337,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" << "********** Function: " - << MF.getFunction()->getName() << "\n"); + << MF.getName() << "\n"); #if 0 // for now disable this, if we move NewValueJump before register diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp index 55cbc09..a295015 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp @@ -109,6 +109,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); DenseMap<unsigned, unsigned> PeepholeMap; + DenseMap<unsigned, std::pair<unsigned, unsigned> > PeepholeDoubleRegsMap; if (DisableHexagonPeephole) return false; @@ -117,6 +118,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { MBBb != MBBe; ++MBBb) { MachineBasicBlock* MBB = MBBb; PeepholeMap.clear(); + PeepholeDoubleRegsMap.clear(); // Traverse the basic block. for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); @@ -140,6 +142,24 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { } } + // Look for this sequence below + // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32 + // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg. + // and convert into + // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg. + if (MI->getOpcode() == Hexagon::LSRd_ri) { + assert(MI->getNumOperands() == 3); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src1 = MI->getOperand(1); + MachineOperand &Src2 = MI->getOperand(2); + if (Src2.getImm() != 32) + continue; + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src1.getReg(); + PeepholeDoubleRegsMap[DstReg] = + std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/); + } + // Look for P=NOT(P). if (!DisablePNotP && (MI->getOpcode() == Hexagon::NOT_p)) { @@ -178,6 +198,21 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { // Change the 1st operand. MI->RemoveOperand(1); MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false)); + } else { + DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI = + PeepholeDoubleRegsMap.find(SrcReg); + if (DI != PeepholeDoubleRegsMap.end()) { + std::pair<unsigned,unsigned> PeepholeSrc = DI->second; + MI->RemoveOperand(1); + MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first, + false /*isDef*/, + false /*isImp*/, + false /*isKill*/, + false /*isDead*/, + false /*isUndef*/, + false /*isEarlyClobber*/, + PeepholeSrc.second)); + } } } } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 2c23674..3742486 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -310,6 +310,58 @@ void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove> Moves.push_back(MachineMove(0, Dst, Src)); } +// Get the weight in units of pressure for this register class. +const RegClassWeight & +HexagonRegisterInfo::getRegClassWeight(const TargetRegisterClass *RC) const { + // Each TargetRegisterClass has a per register weight, and weight + // limit which must be less than the limits of its pressure sets. + static const RegClassWeight RCWeightTable[] = { + {1, 32}, // IntRegs + {1, 8}, // CRRegs + {1, 4}, // PredRegs + {2, 16}, // DoubleRegs + {0, 0} }; + return RCWeightTable[RC->getID()]; +} + +/// Get the number of dimensions of register pressure. +unsigned HexagonRegisterInfo::getNumRegPressureSets() const { + return 4; +} + +/// Get the name of this register unit pressure set. +const char *HexagonRegisterInfo::getRegPressureSetName(unsigned Idx) const { + static const char *const RegPressureSetName[] = { + "IntRegsRegSet", + "CRRegsRegSet", + "PredRegsRegSet", + "DoubleRegsRegSet" + }; + assert((Idx < 4) && "Index out of bounds"); + return RegPressureSetName[Idx]; +} + +/// Get the register unit pressure limit for this dimension. +/// This limit must be adjusted dynamically for reserved registers. +unsigned HexagonRegisterInfo::getRegPressureSetLimit(unsigned Idx) const { + static const int RegPressureLimit [] = { 16, 4, 2, 8 }; + assert((Idx < 4) && "Index out of bounds"); + return RegPressureLimit[Idx]; +} + +const int* +HexagonRegisterInfo::getRegClassPressureSets(const TargetRegisterClass *RC) + const { + static const int RCSetsTable[] = { + 0, -1, // IntRegs + 1, -1, // CRRegs + 2, -1, // PredRegs + 0, -1, // DoubleRegs + -1 }; + static const unsigned RCSetStartTable[] = { 0, 2, 4, 6, 0 }; + unsigned SetListStart = RCSetStartTable[RC->getID()]; + return &RCSetsTable[SetListStart]; +} unsigned HexagonRegisterInfo::getEHExceptionRegister() const { llvm_unreachable("What is the exception register"); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h index 85355ae..8820d13 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -87,6 +87,11 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo { // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; + const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const; + unsigned getNumRegPressureSets() const; + const char *getRegPressureSetName(unsigned Idx) const; + unsigned getRegPressureSetLimit(unsigned Idx) const; + const int* getRegClassPressureSets(const TargetRegisterClass *RC) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp index 2468f0b..4d93dd1 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp @@ -50,7 +50,7 @@ bool HexagonRemoveExtendArgs::runOnFunction(Function &F) { unsigned Idx = 1; for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI, ++Idx) { - if (F.paramHasAttr(Idx, Attribute::SExt)) { + if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt)) { Argument* Arg = AI; if (!isa<PointerType>(Arg->getType())) { for (Instruction::use_iterator UI = Arg->use_begin(); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td index d1076b8..b5ff69a 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td @@ -47,6 +47,7 @@ def HexagonModel : SchedMachineModel { // Max issue per cycle == bundle width. let IssueWidth = 4; let Itineraries = HexagonItineraries; + let LoadLatency = 1; } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td index 9b41126..5668ae8 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td @@ -58,6 +58,7 @@ def HexagonModelV4 : SchedMachineModel { // Max issue per cycle == bundle width. let IssueWidth = 4; let Itineraries = HexagonItinerariesV4; + let LoadLatency = 1; } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index 5d087db..4bacb8f 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -40,28 +40,27 @@ EnableIEEERndNear( HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS): HexagonGenSubtargetInfo(TT, CPU, FS), - HexagonArchVersion(V2), CPUString(CPU.str()) { - ParseSubtargetFeatures(CPU, FS); - switch(HexagonArchVersion) { - case HexagonSubtarget::V2: - break; - case HexagonSubtarget::V3: - EnableV3 = true; - break; - case HexagonSubtarget::V4: - break; - case HexagonSubtarget::V5: - break; - default: - // If the programmer has not specified a Hexagon version, default - // to -mv4. + // If the programmer has not specified a Hexagon version, default to -mv4. + if (CPUString.empty()) CPUString = "hexagonv4"; - HexagonArchVersion = HexagonSubtarget::V4; - break; + + if (CPUString == "hexagonv2") { + HexagonArchVersion = V2; + } else if (CPUString == "hexagonv3") { + EnableV3 = true; + HexagonArchVersion = V3; + } else if (CPUString == "hexagonv4") { + HexagonArchVersion = V4; + } else if (CPUString == "hexagonv5") { + HexagonArchVersion = V5; + } else { + llvm_unreachable("Unrecognized Hexagon processor version"); } + ParseSubtargetFeatures(CPUString, FS); + // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index a7b291f..30866e9 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -14,6 +14,7 @@ #include "HexagonTargetMachine.h" #include "Hexagon.h" #include "HexagonISelLowering.h" +#include "HexagonMachineScheduler.h" #include "llvm/Module.h" #include "llvm/CodeGen/Passes.h" #include "llvm/PassManager.h" @@ -29,6 +30,11 @@ opt<bool> DisableHardwareLoops( "disable-hexagon-hwloops", cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); +static cl:: +opt<bool> DisableHexagonMISched("disable-hexagon-misched", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon MI Scheduling")); + /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the /// library. In particular, it seems that it is not possible to get @@ -42,6 +48,13 @@ extern "C" void LLVMInitializeHexagonTarget() { RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget); } +static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) { + return new VLIWMachineScheduler(C, new ConvergingVLIWScheduler()); +} + +static MachineSchedRegistry +SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", + createVLIWMachineSched); /// HexagonTargetMachine ctor - Create an ILP32 architecture model. /// @@ -55,13 +68,14 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, CodeModel::Model CM, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - DataLayout("e-p:32:32:32-" + DL("e-p:32:32:32-" "i64:64:64-i32:32:32-i16:16:16-i1:32:32-" "f64:64:64-f32:32:32-a0:0-n32") , Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget), - InstrItins(&Subtarget.getInstrItineraryData()) { + InstrItins(&Subtarget.getInstrItineraryData()), + STTI(&TLInfo), VTTI(&TLInfo) { setMCUseCFI(false); } @@ -74,7 +88,7 @@ bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) { PM.add(createDeadCodeEliminationPass()); PM.add(createConstantPropagationPass()); PM.add(createLoopUnrollPass()); - PM.add(createLoopStrengthReducePass(getTargetLowering())); + PM.add(createLoopStrengthReducePass()); return true; } @@ -83,7 +97,13 @@ namespace { class HexagonPassConfig : public TargetPassConfig { public: HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + // Enable MI scheduler. + if (!DisableHexagonMISched) { + enablePass(&MachineSchedulerID); + MachineSchedRegistry::setDefault(createVLIWMachineSched); + } + } HexagonTargetMachine &getHexagonTargetMachine() const { return getTM<HexagonTargetMachine>(); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h index 0336965..7a4215c 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h @@ -20,20 +20,23 @@ #include "HexagonSelectionDAGInfo.h" #include "HexagonFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" +#include "llvm/Target/TargetTransformImpl.h" namespace llvm { class Module; class HexagonTargetMachine : public LLVMTargetMachine { - const TargetData DataLayout; // Calculates type size & alignment. + const DataLayout DL; // Calculates type size & alignment. HexagonSubtarget Subtarget; HexagonInstrInfo InstrInfo; HexagonTargetLowering TLInfo; HexagonSelectionDAGInfo TSInfo; HexagonFrameLowering FrameLowering; const InstrItineraryData* InstrItins; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU, @@ -68,7 +71,15 @@ public: return &TSInfo; } - virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } + + virtual const DataLayout *getDataLayout() const { return &DL; } static unsigned getModuleMatchQuality(const Module &M); // Pass Pipeline Configuration. diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index 32cc709..f4d7761 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -16,7 +16,7 @@ #include "HexagonTargetMachine.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/ELF.h" @@ -73,7 +73,7 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) { Type *Ty = GV->getType()->getElementType(); - return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty)); + return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty)); } return false; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index a03ed03..3d5f685 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -3474,8 +3474,8 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // 1. Two loads unless they are volatile. // 2. Two stores in V4 unless they are volatile. else if ((DepType == SDep::Order) && - !I->hasVolatileMemoryRef() && - !J->hasVolatileMemoryRef()) { + !I->hasOrderedMemoryRef() && + !J->hasOrderedMemoryRef()) { if (QRI->Subtarget.hasV4TOps() && // hexagonv4 allows dual store. MCIDI.mayStore() && MCIDJ.mayStore()) { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/contrib/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h index 9305c27..c607b5d 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h @@ -75,9 +75,9 @@ static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, const Type* ArgTy = LocVT.getTypeForEVT(State.getContext()); unsigned Alignment = - State.getTarget().getTargetData()->getABITypeAlignment(ArgTy); + State.getTarget().getDataLayout()->getABITypeAlignment(ArgTy); unsigned Size = - State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8; + State.getTarget().getDataLayout()->getTypeSizeInBits(ArgTy) / 8; // If it's passed by value, then we need the size of the aggregate not of // the pointer. @@ -130,9 +130,9 @@ static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, const Type* ArgTy = LocVT.getTypeForEVT(State.getContext()); unsigned Alignment = - State.getTarget().getTargetData()->getABITypeAlignment(ArgTy); + State.getTarget().getDataLayout()->getABITypeAlignment(ArgTy); unsigned Size = - State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8; + State.getTarget().getDataLayout()->getTypeSizeInBits(ArgTy) / 8; unsigned Offset3 = State.AllocateStack(Size, Alignment); State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3, diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp index d6e6c36..86f75d1 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -24,7 +24,7 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) { HasLEB128 = true; PrivateGlobalPrefix = ".L"; - LCOMMDirectiveType = LCOMM::ByteAlignment; + LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment; InlineAsmStart = "# InlineAsm Start"; InlineAsmEnd = "# InlineAsm End"; ZeroDirective = "\t.space\t"; diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index 38fb0e8..f7809ca 100644 --- a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -44,9 +44,10 @@ class MBlazeAsmParser : public MCTargetAsmParser { bool ParseDirectiveWord(unsigned Size, SMLoc L); - bool MatchAndEmitInstruction(SMLoc IDLoc, + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out); + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); /// @name Auto-generated Match Functions /// { @@ -56,12 +57,12 @@ class MBlazeAsmParser : public MCTargetAsmParser { /// } - public: MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) : MCTargetAsmParser(), Parser(_Parser) {} - virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, + virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); @@ -313,14 +314,13 @@ static unsigned MatchRegisterName(StringRef Name); /// } // bool MBlazeAsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, +MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out) { + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { MCInst Inst; - SMLoc ErrorLoc; - unsigned ErrorInfo; - - switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) { + switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchingInlineAsm)) { default: break; case Match_Success: Out.EmitInstruction(Inst); @@ -329,10 +329,8 @@ MatchAndEmitInstruction(SMLoc IDLoc, return Error(IDLoc, "instruction use requires an option to be enabled"); case Match_MnemonicFail: return Error(IDLoc, "unrecognized instruction mnemonic"); - case Match_ConversionFail: - return Error(IDLoc, "unable to convert operands to instruction"); - case Match_InvalidOperand: - ErrorLoc = IDLoc; + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; if (ErrorInfo != ~0U) { if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); @@ -343,6 +341,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, return Error(ErrorLoc, "invalid operand for instruction"); } + } llvm_unreachable("Implement any new match types added!"); } @@ -479,7 +478,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// Parse an mblaze instruction mnemonic followed by its operands. bool MBlazeAsmParser:: -ParseInstruction(StringRef Name, SMLoc NameLoc, +ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // The first operands is the token for the instruction name size_t dotLoc = Name.find('.'); diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp index e9f340f..b679a31 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -34,7 +34,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp deleted file mode 100644 index e3c7236..0000000 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp +++ /dev/null @@ -1,107 +0,0 @@ -//===-- MBlazeELFWriterInfo.cpp - ELF Writer Info for the MBlaze backend --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the MBlaze backend. -// -//===----------------------------------------------------------------------===// - -#include "MBlazeELFWriterInfo.h" -#include "MBlazeRelocations.h" -#include "llvm/Function.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" - -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Implementation of the MBlazeELFWriterInfo class -//===----------------------------------------------------------------------===// - -MBlazeELFWriterInfo::MBlazeELFWriterInfo(TargetMachine &TM) - : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64, - TM.getTargetData()->isLittleEndian()) { -} - -MBlazeELFWriterInfo::~MBlazeELFWriterInfo() {} - -unsigned MBlazeELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { - switch (MachineRelTy) { - case MBlaze::reloc_pcrel_word: - return ELF::R_MICROBLAZE_64_PCREL; - case MBlaze::reloc_absolute_word: - return ELF::R_MICROBLAZE_NONE; - default: - llvm_unreachable("unknown mblaze machine relocation type"); - } -} - -long int MBlazeELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier) const { - switch (RelTy) { - case ELF::R_MICROBLAZE_32_PCREL: - return Modifier - 4; - case ELF::R_MICROBLAZE_32: - return Modifier; - default: - llvm_unreachable("unknown mblaze relocation type"); - } -} - -unsigned MBlazeELFWriterInfo::getRelocationTySize(unsigned RelTy) const { - // FIXME: Most of these sizes are guesses based on the name - switch (RelTy) { - case ELF::R_MICROBLAZE_32: - case ELF::R_MICROBLAZE_32_PCREL: - case ELF::R_MICROBLAZE_32_PCREL_LO: - case ELF::R_MICROBLAZE_32_LO: - case ELF::R_MICROBLAZE_SRO32: - case ELF::R_MICROBLAZE_SRW32: - case ELF::R_MICROBLAZE_32_SYM_OP_SYM: - case ELF::R_MICROBLAZE_GOTOFF_32: - return 32; - - case ELF::R_MICROBLAZE_64_PCREL: - case ELF::R_MICROBLAZE_64: - case ELF::R_MICROBLAZE_GOTPC_64: - case ELF::R_MICROBLAZE_GOT_64: - case ELF::R_MICROBLAZE_PLT_64: - case ELF::R_MICROBLAZE_GOTOFF_64: - return 64; - } - - return 0; -} - -bool MBlazeELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { - // FIXME: Most of these are guesses based on the name - switch (RelTy) { - case ELF::R_MICROBLAZE_32_PCREL: - case ELF::R_MICROBLAZE_64_PCREL: - case ELF::R_MICROBLAZE_32_PCREL_LO: - case ELF::R_MICROBLAZE_GOTPC_64: - return true; - } - - return false; -} - -unsigned MBlazeELFWriterInfo::getAbsoluteLabelMachineRelTy() const { - return MBlaze::reloc_absolute_word; -} - -long int MBlazeELFWriterInfo::computeRelocation(unsigned SymOffset, - unsigned RelOffset, - unsigned RelTy) const { - assert((RelTy == ELF::R_MICROBLAZE_32_PCREL || - RelTy == ELF::R_MICROBLAZE_64_PCREL) && - "computeRelocation unknown for this relocation type"); - return SymOffset - (RelOffset + 4); -} diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h deleted file mode 100644 index a314eb7..0000000 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h +++ /dev/null @@ -1,59 +0,0 @@ -//===-- MBlazeELFWriterInfo.h - ELF Writer Info for MBlaze ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the MBlaze backend. -// -//===----------------------------------------------------------------------===// - -#ifndef MBLAZE_ELF_WRITER_INFO_H -#define MBLAZE_ELF_WRITER_INFO_H - -#include "llvm/Target/TargetELFWriterInfo.h" - -namespace llvm { - class TargetMachine; - - class MBlazeELFWriterInfo : public TargetELFWriterInfo { - public: - MBlazeELFWriterInfo(TargetMachine &TM); - virtual ~MBlazeELFWriterInfo(); - - /// getRelocationType - Returns the target specific ELF Relocation type. - /// 'MachineRelTy' contains the object code independent relocation type - virtual unsigned getRelocationType(unsigned MachineRelTy) const; - - /// hasRelocationAddend - True if the target uses an addend in the - /// ELF relocation entry. - virtual bool hasRelocationAddend() const { return false; } - - /// getDefaultAddendForRelTy - Gets the default addend value for a - /// relocation entry based on the target ELF relocation type. - virtual long int getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier = 0) const; - - /// getRelTySize - Returns the size of relocatable field in bits - virtual unsigned getRelocationTySize(unsigned RelTy) const; - - /// isPCRelativeRel - True if the relocation type is pc relative - virtual bool isPCRelativeRel(unsigned RelTy) const; - - /// getJumpTableRelocationTy - Returns the machine relocation type used - /// to reference a jumptable. - virtual unsigned getAbsoluteLabelMachineRelTy() const; - - /// computeRelocation - Some relocatable fields could be relocated - /// directly, avoiding the relocation symbol emission, compute the - /// final relocation value for this symbol. - virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, - unsigned RelTy) const; - }; - -} // end llvm namespace - -#endif // MBLAZE_ELF_WRITER_INFO_H diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp index d2f14a5..9e467bf 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp @@ -23,7 +23,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp index 91aaf94..1c2e3b2 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp @@ -83,7 +83,7 @@ bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const { #undef GET_INTRINSIC_OVERLOAD_TABLE } -/// This defines the "getAttributes(ID id)" method. +/// This defines the "getAttributes(LLVMContext &C, ID id)" method. #define GET_INTRINSIC_ATTRIBUTES #include "MBlazeGenIntrinsics.inc" #undef GET_INTRINSIC_ATTRIBUTES @@ -104,7 +104,8 @@ Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, Type **Tys, unsigned numTy) const { assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded"); - AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID); + AttrListPtr AList = getAttributes(M->getContext(), + (mblazeIntrinsic::ID) IntrID); return cast<Function>(M->getOrInsertFunction(getName(IntrID), getType(M->getContext(), IntrID), AList)); diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index 46f5207..daa76e8 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -140,7 +140,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned oi = i == 2 ? 1 : 2; - DEBUG(dbgs() << "\nFunction : " << MF.getFunction()->getName() << "\n"; + DEBUG(dbgs() << "\nFunction : " << MF.getName() << "\n"; dbgs() << "<--------->\n" << MI); int FrameIndex = MI.getOperand(i).getIndex(); diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 5f82f14..f180652 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -38,11 +38,12 @@ MBlazeTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), - DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"), + DL("E-p:32:32:32-i8:8:8-i16:16:16"), InstrInfo(*this), FrameLowering(Subtarget), - TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()) { + TLInfo(*this), TSInfo(*this), + InstrItins(Subtarget.getInstrItineraryData()), + STTI(&TLInfo), VTTI(&TLInfo) { } namespace { diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h index 1647a21..a8df4e6 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -20,25 +20,26 @@ #include "MBlazeSelectionDAGInfo.h" #include "MBlazeIntrinsicInfo.h" #include "MBlazeFrameLowering.h" -#include "MBlazeELFWriterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetTransformImpl.h" namespace llvm { class formatted_raw_ostream; class MBlazeTargetMachine : public LLVMTargetMachine { MBlazeSubtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment + const DataLayout DL; // Calculates type size & alignment MBlazeInstrInfo InstrInfo; MBlazeFrameLowering FrameLowering; MBlazeTargetLowering TLInfo; MBlazeSelectionDAGInfo TSInfo; MBlazeIntrinsicInfo IntrinsicInfo; - MBlazeELFWriterInfo ELFWriterInfo; InstrItineraryData InstrItins; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: MBlazeTargetMachine(const Target &T, StringRef TT, @@ -59,8 +60,8 @@ namespace llvm { virtual const MBlazeSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual const TargetData *getTargetData() const - { return &DataLayout;} + virtual const DataLayout *getDataLayout() const + { return &DL;} virtual const MBlazeRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } @@ -74,9 +75,10 @@ namespace llvm { const TargetIntrinsicInfo *getIntrinsicInfo() const { return &IntrinsicInfo; } - virtual const MBlazeELFWriterInfo *getELFWriterInfo() const { - return &ELFWriterInfo; - } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const + { return &STTI; } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const + { return &VTTI; } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp index f66ea30..899c74ee 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp @@ -13,7 +13,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ELF.h" @@ -70,7 +70,7 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, return false; Type *Ty = GV->getType()->getElementType(); - return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty)); + return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty)); } const MCSection *MBlazeTargetObjectFile:: diff --git a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp index bfd11a0..2b71d9d 100644 --- a/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp @@ -29,8 +29,8 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); namespace { class MBlazeMCCodeEmitter : public MCCodeEmitter { - MBlazeMCCodeEmitter(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT + MBlazeMCCodeEmitter(const MBlazeMCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const MBlazeMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; public: diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp index 61d7f2b..2e170f1 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -20,7 +20,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -221,3 +221,17 @@ MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } + +void +MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) + const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + // Create a frame entry for the FPW register that must be saved. + if (TFI->hasFP(MF)) { + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true); + (void)FrameIdx; + assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && + "Slot for FPW register must be last in order to be found!"); + } +} diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h index b636827..cb02545 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h +++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h @@ -46,6 +46,7 @@ public: bool hasFP(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 5430d43..5efc6a3 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -274,8 +274,8 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N, else if (AM.JT != -1) Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i16, 0/*AM.SymbolFlags*/); else if (AM.BlockAddr) - Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32, - true, 0/*AM.SymbolFlags*/); + Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, 0, + 0/*AM.SymbolFlags*/); else Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i16); diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp index f8b7e14..fc677ae 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -61,7 +61,7 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : TargetLowering(tm, new TargetLoweringObjectFileELF()), Subtarget(*tm.getSubtargetImpl()) { - TD = getTargetData(); + TD = getDataLayout(); // Set up the register classes. addRegisterClass(MVT::i8, &MSP430::GR8RegClass); @@ -655,7 +655,7 @@ SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true); + SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy()); return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result); } diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h index d8ad02f..991304c 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h +++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.h @@ -169,7 +169,7 @@ namespace llvm { SelectionDAG &DAG) const; const MSP430Subtarget &Subtarget; - const TargetData *TD; + const DataLayout *TD; }; } // namespace llvm diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp index aed46a2..9ae238f 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -220,20 +220,6 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i+1).ChangeToImmediate(Offset); } -void -MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) - const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - // Create a frame entry for the FPW register that must be saved. - if (TFI->hasFP(MF)) { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true); - (void)FrameIdx; - assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && - "Slot for FPW register must be last in order to be found!"); - } -} - unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h index 9ee0a03..64a43bc 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h @@ -49,8 +49,6 @@ public: void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; - // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; }; diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp index 817001d..13e37b3 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -33,10 +33,10 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), - // FIXME: Check TargetData string. - DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), + // FIXME: Check DataLayout string. + DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget) { } + FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) { } namespace { /// MSP430 Code Generator Pass Configuration Options. diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h index f54146b..186172e 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h +++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h @@ -21,9 +21,10 @@ #include "MSP430SelectionDAGInfo.h" #include "MSP430RegisterInfo.h" #include "MSP430Subtarget.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -31,11 +32,13 @@ namespace llvm { /// class MSP430TargetMachine : public LLVMTargetMachine { MSP430Subtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment + const DataLayout DL; // Calculates type size & alignment MSP430InstrInfo InstrInfo; MSP430TargetLowering TLInfo; MSP430SelectionDAGInfo TSInfo; MSP430FrameLowering FrameLowering; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: MSP430TargetMachine(const Target &T, StringRef TT, @@ -47,7 +50,7 @@ public: return &FrameLowering; } virtual const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetData *getTargetData() const { return &DataLayout;} + virtual const DataLayout *getDataLayout() const { return &DL;} virtual const MSP430Subtarget *getSubtargetImpl() const { return &Subtarget; } virtual const TargetRegisterInfo *getRegisterInfo() const { @@ -61,7 +64,12 @@ public: virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const { return &TSInfo; } - + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); }; // MSP430TargetMachine. diff --git a/contrib/llvm/lib/Target/Mangler.cpp b/contrib/llvm/lib/Target/Mangler.cpp index 786a0c5..539a1f7 100644 --- a/contrib/llvm/lib/Target/Mangler.cpp +++ b/contrib/llvm/lib/Target/Mangler.cpp @@ -14,7 +14,7 @@ #include "llvm/Target/Mangler.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/raw_ostream.h" @@ -44,7 +44,7 @@ static void MangleLetter(SmallVectorImpl<char> &OutName, unsigned char C) { OutName.push_back('_'); } -/// NameNeedsEscaping - Return true if the identifier \arg Str needs quotes +/// NameNeedsEscaping - Return true if the identifier \p Str needs quotes /// for this assembler. static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) { assert(!Str.empty() && "Cannot create an empty MCSymbol"); @@ -157,7 +157,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName, /// a suffix on their name indicating the number of words of arguments they /// take. static void AddFastCallStdCallSuffix(SmallVectorImpl<char> &OutName, - const Function *F, const TargetData &TD) { + const Function *F, const DataLayout &TD) { // Calculate arguments size total. unsigned ArgWords = 0; for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); @@ -183,8 +183,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName, ManglerPrefixTy PrefixTy = Mangler::Default; if (GV->hasPrivateLinkage() || isImplicitlyPrivate) PrefixTy = Mangler::Private; - else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() || - GV->hasLinkerPrivateWeakDefAutoLinkage()) + else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage()) PrefixTy = Mangler::LinkerPrivate; // If this global has a name, handle it simply. diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 58b5590..67b5248 100644 --- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -8,53 +8,1316 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/MipsMCTargetDesc.h" +#include "MipsRegisterInfo.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; namespace { +class MipsAssemblerOptions { +public: + MipsAssemblerOptions(): + aTReg(1), reorder(true), macro(true) { + } + + unsigned getATRegNum() {return aTReg;} + bool setATReg(unsigned Reg); + + bool isReorder() {return reorder;} + void setReorder() {reorder = true;} + void setNoreorder() {reorder = false;} + + bool isMacro() {return macro;} + void setMacro() {macro = true;} + void setNomacro() {macro = false;} + +private: + unsigned aTReg; + bool reorder; + bool macro; +}; +} + +namespace { class MipsAsmParser : public MCTargetAsmParser { - bool MatchAndEmitInstruction(SMLoc IDLoc, + + enum FpFormatTy { + FP_FORMAT_NONE = -1, + FP_FORMAT_S, + FP_FORMAT_D, + FP_FORMAT_L, + FP_FORMAT_W + } FpFormat; + + MCSubtargetInfo &STI; + MCAsmParser &Parser; + MipsAssemblerOptions Options; + + +#define GET_ASSEMBLER_HEADER +#include "MipsGenAsmMatcher.inc" + + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out); + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - bool ParseInstruction(StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + bool parseMathOperation(StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); bool ParseDirective(AsmToken DirectiveID); + MipsAsmParser::OperandMatchResultTy + parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&); + + bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, + StringRef Mnemonic); + + int tryParseRegister(StringRef Mnemonic); + + bool tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + StringRef Mnemonic); + + bool needsExpansion(MCInst &Inst); + + void expandInstruction(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions); + void expandLoadImm(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions); + void expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions); + void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions); + bool reportParseError(StringRef ErrorMsg); + + bool parseMemOffset(const MCExpr *&Res); + bool parseRelocOperand(const MCExpr *&Res); + + bool parseDirectiveSet(); + + bool parseSetAtDirective(); + bool parseSetNoAtDirective(); + bool parseSetMacroDirective(); + bool parseSetNoMacroDirective(); + bool parseSetReorderDirective(); + bool parseSetNoReorderDirective(); + + MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol); + + bool isMips64() const { + return (STI.getFeatureBits() & Mips::FeatureMips64) != 0; + } + + bool isFP64() const { + return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0; + } + + int matchRegisterName(StringRef Symbol); + + int matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic); + + void setFpFormat(FpFormatTy Format) { + FpFormat = Format; + } + + void setDefaultFpFormat(); + + void setFpFormat(StringRef Format); + + FpFormatTy getFpFormat() {return FpFormat;} + + bool requestsDoubleOperand(StringRef Mnemonic); + + unsigned getReg(int RC,int RegNo); + + unsigned getATReg(); public: MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) - : MCTargetAsmParser() { + : MCTargetAsmParser(), STI(sti), Parser(parser) { + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + } + + MCAsmParser &getParser() const { return Parser; } + MCAsmLexer &getLexer() const { return Parser.getLexer(); } + +}; +} + +namespace { + +/// MipsOperand - Instances of this class represent a parsed Mips machine +/// instruction. +class MipsOperand : public MCParsedAsmOperand { + + enum KindTy { + k_CondCode, + k_CoprocNum, + k_Immediate, + k_Memory, + k_PostIndexRegister, + k_Register, + k_Token + } Kind; + + MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + + union { + struct { + const char *Data; + unsigned Length; + } Tok; + + struct { + unsigned RegNum; + } Reg; + + struct { + const MCExpr *Val; + } Imm; + + struct { + unsigned Base; + const MCExpr *Off; + } Mem; + }; + + SMLoc StartLoc, EndLoc; + +public: + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getReg())); + } + + void addExpr(MCInst &Inst, const MCExpr *Expr) const{ + // Add as immediate when possible. Null MCExpr = 0. + if (Expr == 0) + Inst.addOperand(MCOperand::CreateImm(0)); + else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCExpr *Expr = getImm(); + addExpr(Inst,Expr); + } + + void addMemOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateReg(getMemBase())); + + const MCExpr *Expr = getMemOff(); + addExpr(Inst,Expr); + } + + bool isReg() const { return Kind == k_Register; } + bool isImm() const { return Kind == k_Immediate; } + bool isToken() const { return Kind == k_Token; } + bool isMem() const { return Kind == k_Memory; } + + StringRef getToken() const { + assert(Kind == k_Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + unsigned getReg() const { + assert((Kind == k_Register) && "Invalid access!"); + return Reg.RegNum; + } + + const MCExpr *getImm() const { + assert((Kind == k_Immediate) && "Invalid access!"); + return Imm.Val; + } + + unsigned getMemBase() const { + assert((Kind == k_Memory) && "Invalid access!"); + return Mem.Base; + } + + const MCExpr *getMemOff() const { + assert((Kind == k_Memory) && "Invalid access!"); + return Mem.Off; + } + + static MipsOperand *CreateToken(StringRef Str, SMLoc S) { + MipsOperand *Op = new MipsOperand(k_Token); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return Op; } + static MipsOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { + MipsOperand *Op = new MipsOperand(k_Register); + Op->Reg.RegNum = RegNum; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static MipsOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { + MipsOperand *Op = new MipsOperand(k_Immediate); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static MipsOperand *CreateMem(unsigned Base, const MCExpr *Off, + SMLoc S, SMLoc E) { + MipsOperand *Op = new MipsOperand(k_Memory); + Op->Mem.Base = Base; + Op->Mem.Off = Off; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + /// getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const { return StartLoc; } + /// getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const { return EndLoc; } + + virtual void print(raw_ostream &OS) const { + llvm_unreachable("unimplemented!"); + } }; } +bool MipsAsmParser::needsExpansion(MCInst &Inst) { + + switch(Inst.getOpcode()) { + case Mips::LoadImm32Reg: + case Mips::LoadAddr32Imm: + case Mips::LoadAddr32Reg: + return true; + default: + return false; + } +} + +void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions){ + switch(Inst.getOpcode()) { + case Mips::LoadImm32Reg: + return expandLoadImm(Inst, IDLoc, Instructions); + case Mips::LoadAddr32Imm: + return expandLoadAddressImm(Inst,IDLoc,Instructions); + case Mips::LoadAddr32Reg: + return expandLoadAddressReg(Inst,IDLoc,Instructions); + } +} + +void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions){ + MCInst tmpInst; + const MCOperand &ImmOp = Inst.getOperand(1); + assert(ImmOp.isImm() && "expected immediate operand kind"); + const MCOperand &RegOp = Inst.getOperand(0); + assert(RegOp.isReg() && "expected register operand kind"); + + int ImmValue = ImmOp.getImm(); + tmpInst.setLoc(IDLoc); + if ( 0 <= ImmValue && ImmValue <= 65535) { + // for 0 <= j <= 65535. + // li d,j => ori d,$zero,j + tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi); + tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); + tmpInst.addOperand( + MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO)); + tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); + Instructions.push_back(tmpInst); + } else if ( ImmValue < 0 && ImmValue >= -32768) { + // for -32768 <= j < 0. + // li d,j => addiu d,$zero,j + tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files? + tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); + tmpInst.addOperand( + MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO)); + tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); + Instructions.push_back(tmpInst); + } else { + // for any other value of j that is representable as a 32-bit integer. + // li d,j => lui d,hi16(j) + // ori d,d,lo16(j) + tmpInst.setOpcode(isMips64() ? Mips::LUi64 : Mips::LUi); + tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); + tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); + Instructions.push_back(tmpInst); + tmpInst.clear(); + tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi); + tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); + tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); + tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); + tmpInst.setLoc(IDLoc); + Instructions.push_back(tmpInst); + } +} + +void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions){ + MCInst tmpInst; + const MCOperand &ImmOp = Inst.getOperand(2); + assert(ImmOp.isImm() && "expected immediate operand kind"); + const MCOperand &SrcRegOp = Inst.getOperand(1); + assert(SrcRegOp.isReg() && "expected register operand kind"); + const MCOperand &DstRegOp = Inst.getOperand(0); + assert(DstRegOp.isReg() && "expected register operand kind"); + int ImmValue = ImmOp.getImm(); + if ( -32768 <= ImmValue && ImmValue <= 65535) { + //for -32768 <= j <= 65535. + //la d,j(s) => addiu d,s,j + tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files? + tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); + tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg())); + tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); + Instructions.push_back(tmpInst); + } else { + //for any other value of j that is representable as a 32-bit integer. + //la d,j(s) => lui d,hi16(j) + // ori d,d,lo16(j) + // addu d,d,s + tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi); + tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); + tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); + Instructions.push_back(tmpInst); + tmpInst.clear(); + tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi); + tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); + tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); + tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); + Instructions.push_back(tmpInst); + tmpInst.clear(); + tmpInst.setOpcode(Mips::ADDu); + tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); + tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); + tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg())); + Instructions.push_back(tmpInst); + } +} + +void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions){ + MCInst tmpInst; + const MCOperand &ImmOp = Inst.getOperand(1); + assert(ImmOp.isImm() && "expected immediate operand kind"); + const MCOperand &RegOp = Inst.getOperand(0); + assert(RegOp.isReg() && "expected register operand kind"); + int ImmValue = ImmOp.getImm(); + if ( -32768 <= ImmValue && ImmValue <= 65535) { + //for -32768 <= j <= 65535. + //la d,j => addiu d,$zero,j + tmpInst.setOpcode(Mips::ADDiu); + tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); + tmpInst.addOperand( + MCOperand::CreateReg(isMips64()?Mips::ZERO_64:Mips::ZERO)); + tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); + Instructions.push_back(tmpInst); + } else { + //for any other value of j that is representable as a 32-bit integer. + //la d,j => lui d,hi16(j) + // ori d,d,lo16(j) + tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi); + tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); + tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); + Instructions.push_back(tmpInst); + tmpInst.clear(); + tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi); + tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); + tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); + tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); + Instructions.push_back(tmpInst); + } +} + bool MipsAsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, +MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out) { + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchingInlineAsm); + + switch (MatchResult) { + default: break; + case Match_Success: { + if (needsExpansion(Inst)) { + SmallVector<MCInst, 4> Instructions; + expandInstruction(Inst, IDLoc, Instructions); + for(unsigned i =0; i < Instructions.size(); i++){ + Out.EmitInstruction(Instructions[i]); + } + } else { + Inst.setLoc(IDLoc); + Out.EmitInstruction(Inst); + } + return false; + } + case Match_MissingFeature: + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((MipsOperand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + case Match_MnemonicFail: + return Error(IDLoc, "invalid instruction"); + } + return true; +} + +int MipsAsmParser::matchRegisterName(StringRef Name) { + + int CC; + if (!isMips64()) + CC = StringSwitch<unsigned>(Name) + .Case("zero", Mips::ZERO) + .Case("a0", Mips::A0) + .Case("a1", Mips::A1) + .Case("a2", Mips::A2) + .Case("a3", Mips::A3) + .Case("v0", Mips::V0) + .Case("v1", Mips::V1) + .Case("s0", Mips::S0) + .Case("s1", Mips::S1) + .Case("s2", Mips::S2) + .Case("s3", Mips::S3) + .Case("s4", Mips::S4) + .Case("s5", Mips::S5) + .Case("s6", Mips::S6) + .Case("s7", Mips::S7) + .Case("k0", Mips::K0) + .Case("k1", Mips::K1) + .Case("sp", Mips::SP) + .Case("fp", Mips::FP) + .Case("gp", Mips::GP) + .Case("ra", Mips::RA) + .Case("t0", Mips::T0) + .Case("t1", Mips::T1) + .Case("t2", Mips::T2) + .Case("t3", Mips::T3) + .Case("t4", Mips::T4) + .Case("t5", Mips::T5) + .Case("t6", Mips::T6) + .Case("t7", Mips::T7) + .Case("t8", Mips::T8) + .Case("t9", Mips::T9) + .Case("at", Mips::AT) + .Case("fcc0", Mips::FCC0) + .Default(-1); + else + CC = StringSwitch<unsigned>(Name) + .Case("zero", Mips::ZERO_64) + .Case("at", Mips::AT_64) + .Case("v0", Mips::V0_64) + .Case("v1", Mips::V1_64) + .Case("a0", Mips::A0_64) + .Case("a1", Mips::A1_64) + .Case("a2", Mips::A2_64) + .Case("a3", Mips::A3_64) + .Case("a4", Mips::T0_64) + .Case("a5", Mips::T1_64) + .Case("a6", Mips::T2_64) + .Case("a7", Mips::T3_64) + .Case("t4", Mips::T4_64) + .Case("t5", Mips::T5_64) + .Case("t6", Mips::T6_64) + .Case("t7", Mips::T7_64) + .Case("s0", Mips::S0_64) + .Case("s1", Mips::S1_64) + .Case("s2", Mips::S2_64) + .Case("s3", Mips::S3_64) + .Case("s4", Mips::S4_64) + .Case("s5", Mips::S5_64) + .Case("s6", Mips::S6_64) + .Case("s7", Mips::S7_64) + .Case("t8", Mips::T8_64) + .Case("t9", Mips::T9_64) + .Case("kt0", Mips::K0_64) + .Case("kt1", Mips::K1_64) + .Case("gp", Mips::GP_64) + .Case("sp", Mips::SP_64) + .Case("fp", Mips::FP_64) + .Case("s8", Mips::FP_64) + .Case("ra", Mips::RA_64) + .Default(-1); + + if (CC != -1) + return CC; + + if (Name[0] == 'f') { + StringRef NumString = Name.substr(1); + unsigned IntVal; + if( NumString.getAsInteger(10, IntVal)) + return -1; // not integer + if (IntVal > 31) + return -1; + + FpFormatTy Format = getFpFormat(); + + if (Format == FP_FORMAT_S || Format == FP_FORMAT_W) + return getReg(Mips::FGR32RegClassID, IntVal); + if (Format == FP_FORMAT_D) { + if(isFP64()) { + return getReg(Mips::FGR64RegClassID, IntVal); + } + // only even numbers available as register pairs + if (( IntVal > 31) || (IntVal%2 != 0)) + return -1; + return getReg(Mips::AFGR64RegClassID, IntVal/2); + } + } + + return -1; +} +void MipsAsmParser::setDefaultFpFormat() { + + if (isMips64() || isFP64()) + FpFormat = FP_FORMAT_D; + else + FpFormat = FP_FORMAT_S; +} + +bool MipsAsmParser::requestsDoubleOperand(StringRef Mnemonic){ + + bool IsDouble = StringSwitch<bool>(Mnemonic.lower()) + .Case("ldxc1", true) + .Case("ldc1", true) + .Case("sdxc1", true) + .Case("sdc1", true) + .Default(false); + + return IsDouble; +} +void MipsAsmParser::setFpFormat(StringRef Format) { + + FpFormat = StringSwitch<FpFormatTy>(Format.lower()) + .Case(".s", FP_FORMAT_S) + .Case(".d", FP_FORMAT_D) + .Case(".l", FP_FORMAT_L) + .Case(".w", FP_FORMAT_W) + .Default(FP_FORMAT_NONE); +} + +bool MipsAssemblerOptions::setATReg(unsigned Reg) { + if (Reg > 31) + return false; + + aTReg = Reg; return true; } +unsigned MipsAsmParser::getATReg() { + unsigned Reg = Options.getATRegNum(); + if (isMips64()) + return getReg(Mips::CPU64RegsRegClassID,Reg); + + return getReg(Mips::CPURegsRegClassID,Reg); +} + +unsigned MipsAsmParser::getReg(int RC,int RegNo) { + return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo); +} + +int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic) { + + if (Mnemonic.lower() == "rdhwr") { + // at the moment only hwreg29 is supported + if (RegNum != 29) + return -1; + return Mips::HWR29; + } + + if (RegNum > 31) + return -1; + + // MIPS64 registers are numbered 1 after the 32-bit equivalents + return getReg(Mips::CPURegsRegClassID, RegNum) + isMips64(); +} + +int MipsAsmParser::tryParseRegister(StringRef Mnemonic) { + const AsmToken &Tok = Parser.getTok(); + int RegNum = -1; + + if (Tok.is(AsmToken::Identifier)) { + std::string lowerCase = Tok.getString().lower(); + RegNum = matchRegisterName(lowerCase); + } else if (Tok.is(AsmToken::Integer)) + RegNum = matchRegisterByNumber(static_cast<unsigned>(Tok.getIntVal()), + Mnemonic.lower()); + else + return RegNum; //error + // 64 bit div operations require Mips::ZERO instead of MIPS::ZERO_64 + if (isMips64() && RegNum == Mips::ZERO_64) { + if (Mnemonic.find("ddiv") != StringRef::npos) + RegNum = Mips::ZERO; + } + return RegNum; +} + bool MipsAsmParser:: -ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { + tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + StringRef Mnemonic){ + + SMLoc S = Parser.getTok().getLoc(); + int RegNo = -1; + + // FIXME: we should make a more generic method for CCR + if ((Mnemonic == "cfc1" || Mnemonic == "ctc1") + && Operands.size() == 2 && Parser.getTok().is(AsmToken::Integer)){ + RegNo = Parser.getTok().getIntVal(); // get the int value + // at the moment only fcc0 is supported + if (RegNo == 0) + RegNo = Mips::FCC0; + } else + RegNo = tryParseRegister(Mnemonic); + if (RegNo == -1) + return true; + + Operands.push_back(MipsOperand::CreateReg(RegNo, S, + Parser.getTok().getLoc())); + Parser.Lex(); // Eat register token. + return false; +} + +bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, + StringRef Mnemonic) { + // Check if the current operand has a custom associated parser, if so, try to + // custom parse the operand, or fallback to the general approach. + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + if (ResTy == MatchOperand_Success) + return false; + // If there wasn't a custom match, try the generic matcher below. Otherwise, + // there was a match, but an error occurred, in which case, just return that + // the operand parsing failed. + if (ResTy == MatchOperand_ParseFail) + return true; + + switch (getLexer().getKind()) { + default: + Error(Parser.getTok().getLoc(), "unexpected token in operand"); + return true; + case AsmToken::Dollar: { + // parse register + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat dollar token. + // parse register operand + if (!tryParseRegisterOperand(Operands, Mnemonic)) { + if (getLexer().is(AsmToken::LParen)) { + // check if it is indexed addressing operand + Operands.push_back(MipsOperand::CreateToken("(", S)); + Parser.Lex(); // eat parenthesis + if (getLexer().isNot(AsmToken::Dollar)) + return true; + + Parser.Lex(); // eat dollar + if (tryParseRegisterOperand(Operands, Mnemonic)) + return true; + + if (!getLexer().is(AsmToken::RParen)) + return true; + + S = Parser.getTok().getLoc(); + Operands.push_back(MipsOperand::CreateToken(")", S)); + Parser.Lex(); + } + return false; + } + // maybe it is a symbol reference + StringRef Identifier; + if (Parser.ParseIdentifier(Identifier)) + return true; + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier); + + // Otherwise create a symbol ref. + const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, + getContext()); + + Operands.push_back(MipsOperand::CreateImm(Res, S, E)); + return false; + } + case AsmToken::Identifier: + case AsmToken::LParen: + case AsmToken::Minus: + case AsmToken::Plus: + case AsmToken::Integer: + case AsmToken::String: { + // quoted label names + const MCExpr *IdVal; + SMLoc S = Parser.getTok().getLoc(); + if (getParser().ParseExpression(IdVal)) + return true; + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); + return false; + } + case AsmToken::Percent: { + // it is a symbol reference or constant expression + const MCExpr *IdVal; + SMLoc S = Parser.getTok().getLoc(); // start location of the operand + if (parseRelocOperand(IdVal)) + return true; + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); + return false; + } // case AsmToken::Percent + } // switch(getLexer().getKind()) + return true; +} + +bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { + + Parser.Lex(); // eat % token + const AsmToken &Tok = Parser.getTok(); // get next token, operation + if (Tok.isNot(AsmToken::Identifier)) + return true; + + std::string Str = Tok.getIdentifier().str(); + + Parser.Lex(); // eat identifier + // now make expression from the rest of the operand + const MCExpr *IdVal; + SMLoc EndLoc; + + if (getLexer().getKind() == AsmToken::LParen) { + while (1) { + Parser.Lex(); // eat '(' token + if (getLexer().getKind() == AsmToken::Percent) { + Parser.Lex(); // eat % token + const AsmToken &nextTok = Parser.getTok(); + if (nextTok.isNot(AsmToken::Identifier)) + return true; + Str += "(%"; + Str += nextTok.getIdentifier(); + Parser.Lex(); // eat identifier + if (getLexer().getKind() != AsmToken::LParen) + return true; + } else + break; + } + if (getParser().ParseParenExpression(IdVal,EndLoc)) + return true; + + while (getLexer().getKind() == AsmToken::RParen) + Parser.Lex(); // eat ')' token + + } else + return true; // parenthesis must follow reloc operand + + // Check the type of the expression + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) { + // it's a constant, evaluate lo or hi value + int Val = MCE->getValue(); + if (Str == "lo") { + Val = Val & 0xffff; + } else if (Str == "hi") { + Val = (Val & 0xffff0000) >> 16; + } + Res = MCConstantExpr::Create(Val, getContext()); + return false; + } + + if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) { + // it's a symbol, create symbolic expression from symbol + StringRef Symbol = MSRE->getSymbol().getName(); + MCSymbolRefExpr::VariantKind VK = getVariantKind(Str); + Res = MCSymbolRefExpr::Create(Symbol,VK,getContext()); + return false; + } return true; } +bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + + StartLoc = Parser.getTok().getLoc(); + RegNo = tryParseRegister(""); + EndLoc = Parser.getTok().getLoc(); + return (RegNo == (unsigned)-1); +} + +bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) { + + SMLoc S; + + switch(getLexer().getKind()) { + default: + return true; + case AsmToken::Integer: + case AsmToken::Minus: + case AsmToken::Plus: + return (getParser().ParseExpression(Res)); + case AsmToken::Percent: + return parseRelocOperand(Res); + case AsmToken::LParen: + return false; // it's probably assuming 0 + } + return true; +} + +MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( + SmallVectorImpl<MCParsedAsmOperand*>&Operands) { + + const MCExpr *IdVal = 0; + SMLoc S; + // first operand is the offset + S = Parser.getTok().getLoc(); + + if (parseMemOffset(IdVal)) + return MatchOperand_ParseFail; + + const AsmToken &Tok = Parser.getTok(); // get next token + if (Tok.isNot(AsmToken::LParen)) { + MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]); + if (Mnemonic->getToken() == "la") { + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer()-1); + Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); + return MatchOperand_Success; + } + Error(Parser.getTok().getLoc(), "'(' expected"); + return MatchOperand_ParseFail; + } + + Parser.Lex(); // Eat '(' token. + + const AsmToken &Tok1 = Parser.getTok(); // get next token + if (Tok1.is(AsmToken::Dollar)) { + Parser.Lex(); // Eat '$' token. + if (tryParseRegisterOperand(Operands,"")) { + Error(Parser.getTok().getLoc(), "unexpected token in operand"); + return MatchOperand_ParseFail; + } + + } else { + Error(Parser.getTok().getLoc(), "unexpected token in operand"); + return MatchOperand_ParseFail; + } + + const AsmToken &Tok2 = Parser.getTok(); // get next token + if (Tok2.isNot(AsmToken::RParen)) { + Error(Parser.getTok().getLoc(), "')' expected"); + return MatchOperand_ParseFail; + } + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + Parser.Lex(); // Eat ')' token. + + if (IdVal == 0) + IdVal = MCConstantExpr::Create(0, getContext()); + + // now replace register operand with the mem operand + MipsOperand* op = static_cast<MipsOperand*>(Operands.back()); + int RegNo = op->getReg(); + // remove register from operands + Operands.pop_back(); + // and add memory operand + Operands.push_back(MipsOperand::CreateMem(RegNo, IdVal, S, E)); + delete op; + return MatchOperand_Success; +} + +MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) { + + MCSymbolRefExpr::VariantKind VK + = StringSwitch<MCSymbolRefExpr::VariantKind>(Symbol) + .Case("hi", MCSymbolRefExpr::VK_Mips_ABS_HI) + .Case("lo", MCSymbolRefExpr::VK_Mips_ABS_LO) + .Case("gp_rel", MCSymbolRefExpr::VK_Mips_GPREL) + .Case("call16", MCSymbolRefExpr::VK_Mips_GOT_CALL) + .Case("got", MCSymbolRefExpr::VK_Mips_GOT) + .Case("tlsgd", MCSymbolRefExpr::VK_Mips_TLSGD) + .Case("tlsldm", MCSymbolRefExpr::VK_Mips_TLSLDM) + .Case("dtprel_hi", MCSymbolRefExpr::VK_Mips_DTPREL_HI) + .Case("dtprel_lo", MCSymbolRefExpr::VK_Mips_DTPREL_LO) + .Case("gottprel", MCSymbolRefExpr::VK_Mips_GOTTPREL) + .Case("tprel_hi", MCSymbolRefExpr::VK_Mips_TPREL_HI) + .Case("tprel_lo", MCSymbolRefExpr::VK_Mips_TPREL_LO) + .Case("got_disp", MCSymbolRefExpr::VK_Mips_GOT_DISP) + .Case("got_page", MCSymbolRefExpr::VK_Mips_GOT_PAGE) + .Case("got_ofst", MCSymbolRefExpr::VK_Mips_GOT_OFST) + .Case("hi(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_HI) + .Case("lo(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_LO) + .Default(MCSymbolRefExpr::VK_None); + + return VK; +} + +static int ConvertCcString(StringRef CondString) { + int CC = StringSwitch<unsigned>(CondString) + .Case(".f", 0) + .Case(".un", 1) + .Case(".eq", 2) + .Case(".ueq", 3) + .Case(".olt", 4) + .Case(".ult", 5) + .Case(".ole", 6) + .Case(".ule", 7) + .Case(".sf", 8) + .Case(".ngle", 9) + .Case(".seq", 10) + .Case(".ngl", 11) + .Case(".lt", 12) + .Case(".nge", 13) + .Case(".le", 14) + .Case(".ngt", 15) + .Default(-1); + + return CC; +} + bool MipsAsmParser:: -ParseInstruction(StringRef Name, SMLoc NameLoc, +parseMathOperation(StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // split the format + size_t Start = Name.find('.'), Next = Name.rfind('.'); + StringRef Format1 = Name.slice(Start, Next); + // and add the first format to the operands + Operands.push_back(MipsOperand::CreateToken(Format1, NameLoc)); + // now for the second format + StringRef Format2 = Name.slice(Next, StringRef::npos); + Operands.push_back(MipsOperand::CreateToken(Format2, NameLoc)); + + // set the format for the first register + setFpFormat(Format1); + + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (ParseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + + if (getLexer().isNot(AsmToken::Comma)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + + } + Parser.Lex(); // Eat the comma. + + //set the format for the first register + setFpFormat(Format2); + + // Parse and remember the operand. + if (ParseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + + Parser.Lex(); // Consume the EndOfStatement + return false; +} + +bool MipsAsmParser:: +ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // floating point instructions: should register be treated as double? + if (requestsDoubleOperand(Name)) { + setFpFormat(FP_FORMAT_D); + Operands.push_back(MipsOperand::CreateToken(Name, NameLoc)); + } + else { + setDefaultFpFormat(); + // Create the leading tokens for the mnemonic, split by '.' characters. + size_t Start = 0, Next = Name.find('.'); + StringRef Mnemonic = Name.slice(Start, Next); + + Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc)); + + if (Next != StringRef::npos) { + // there is a format token in mnemonic + // StringRef Rest = Name.slice(Next, StringRef::npos); + size_t Dot = Name.find('.', Next+1); + StringRef Format = Name.slice(Next, Dot); + if (Dot == StringRef::npos) //only one '.' in a string, it's a format + Operands.push_back(MipsOperand::CreateToken(Format, NameLoc)); + else { + if (Name.startswith("c.")){ + // floating point compare, add '.' and immediate represent for cc + Operands.push_back(MipsOperand::CreateToken(".", NameLoc)); + int Cc = ConvertCcString(Format); + if (Cc == -1) { + return Error(NameLoc, "Invalid conditional code"); + } + SMLoc E = SMLoc::getFromPointer( + Parser.getTok().getLoc().getPointer() -1 ); + Operands.push_back(MipsOperand::CreateImm( + MCConstantExpr::Create(Cc, getContext()), NameLoc, E)); + } else { + // trunc, ceil, floor ... + return parseMathOperation(Name, NameLoc, Operands); + } + + // the rest is a format + Format = Name.slice(Dot, StringRef::npos); + Operands.push_back(MipsOperand::CreateToken(Format, NameLoc)); + } + + setFpFormat(Format); + } + } + + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (ParseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + + while (getLexer().is(AsmToken::Comma) ) { + Parser.Lex(); // Eat the comma. + + // Parse and remember the operand. + if (ParseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + + Parser.Lex(); // Consume the EndOfStatement + return false; +} + +bool MipsAsmParser::reportParseError(StringRef ErrorMsg) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, ErrorMsg); +} + +bool MipsAsmParser::parseSetNoAtDirective() { + // line should look like: + // .set noat + // set at reg to 0 + Options.setATReg(0); + // eat noat + Parser.Lex(); + // if this is not the end of the statement, report error + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token in statement"); + return false; + } + Parser.Lex(); // Consume the EndOfStatement + return false; +} +bool MipsAsmParser::parseSetAtDirective() { + // line can be + // .set at - defaults to $1 + // or .set at=$reg + getParser().Lex(); + if (getLexer().is(AsmToken::EndOfStatement)) { + Options.setATReg(1); + Parser.Lex(); // Consume the EndOfStatement + return false; + } else if (getLexer().is(AsmToken::Equal)) { + getParser().Lex(); //eat '=' + if (getLexer().isNot(AsmToken::Dollar)) { + reportParseError("unexpected token in statement"); + return false; + } + Parser.Lex(); // eat '$' + if (getLexer().isNot(AsmToken::Integer)) { + reportParseError("unexpected token in statement"); + return false; + } + const AsmToken &Reg = Parser.getTok(); + if (!Options.setATReg(Reg.getIntVal())) { + reportParseError("unexpected token in statement"); + return false; + } + getParser().Lex(); //eat reg + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token in statement"); + return false; + } + Parser.Lex(); // Consume the EndOfStatement + return false; + } else { + reportParseError("unexpected token in statement"); + return false; + } +} + +bool MipsAsmParser::parseSetReorderDirective() { + Parser.Lex(); + // if this is not the end of the statement, report error + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token in statement"); + return false; + } + Options.setReorder(); + Parser.Lex(); // Consume the EndOfStatement + return false; +} + +bool MipsAsmParser::parseSetNoReorderDirective() { + Parser.Lex(); + // if this is not the end of the statement, report error + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token in statement"); + return false; + } + Options.setNoreorder(); + Parser.Lex(); // Consume the EndOfStatement + return false; +} + +bool MipsAsmParser::parseSetMacroDirective() { + Parser.Lex(); + // if this is not the end of the statement, report error + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token in statement"); + return false; + } + Options.setMacro(); + Parser.Lex(); // Consume the EndOfStatement + return false; +} + +bool MipsAsmParser::parseSetNoMacroDirective() { + Parser.Lex(); + // if this is not the end of the statement, report error + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("`noreorder' must be set before `nomacro'"); + return false; + } + if (Options.isReorder()) { + reportParseError("`noreorder' must be set before `nomacro'"); + return false; + } + Options.setNomacro(); + Parser.Lex(); // Consume the EndOfStatement + return false; +} +bool MipsAsmParser::parseDirectiveSet() { + + // get next token + const AsmToken &Tok = Parser.getTok(); + + if (Tok.getString() == "noat") { + return parseSetNoAtDirective(); + } else if (Tok.getString() == "at") { + return parseSetAtDirective(); + } else if (Tok.getString() == "reorder") { + return parseSetReorderDirective(); + } else if (Tok.getString() == "noreorder") { + return parseSetNoReorderDirective(); + } else if (Tok.getString() == "macro") { + return parseSetMacroDirective(); + } else if (Tok.getString() == "nomacro") { + return parseSetNoMacroDirective(); + } else if (Tok.getString() == "nomips16") { + // ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } else if (Tok.getString() == "nomicromips") { + // ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } return true; } -bool MipsAsmParser:: -ParseDirective(AsmToken DirectiveID) { +bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { + + if (DirectiveID.getString() == ".ent") { + // ignore this directive for now + Parser.Lex(); + return false; + } + + if (DirectiveID.getString() == ".end") { + // ignore this directive for now + Parser.Lex(); + return false; + } + + if (DirectiveID.getString() == ".frame") { + // ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } + + if (DirectiveID.getString() == ".set") { + return parseDirectiveSet(); + } + + if (DirectiveID.getString() == ".fmask") { + // ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } + + if (DirectiveID.getString() == ".mask") { + // ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } + + if (DirectiveID.getString() == ".gpword") { + // ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } + return true; } @@ -64,3 +1327,7 @@ extern "C" void LLVMInitializeMipsAsmParser() { RegisterMCAsmParser<MipsAsmParser> A(TheMips64Target); RegisterMCAsmParser<MipsAsmParser> B(TheMips64elTarget); } + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#include "MipsGenAsmMatcher.inc" diff --git a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index aa57472..82dbcc5 100644 --- a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -108,6 +108,11 @@ static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -138,6 +143,11 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, @@ -346,6 +356,13 @@ static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeCPURegsRegisterClass(Inst, RegNo, Address, Decoder); +} + static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -463,6 +480,18 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo >= 4) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, Mips::ACRegsRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 790bed2..9a35bb6 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -92,7 +92,7 @@ public: MCELFObjectTargetWriter::getOSABI(OSType), IsLittle, Is64Bit); } - /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided + /// ApplyFixup - Apply the \p Value for given \p Fixup into the provided /// data fragment, at the offset specified by the fixup and following the /// fixup kind as appropriate. void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, @@ -217,7 +217,7 @@ public: /// /// \param Inst - The instruction to relax, which may be the same /// as the output. - /// \parm Res [output] - On return, the relaxed instruction. + /// \param [out] Res On return, the relaxed instruction. void relaxInstruction(const MCInst &Inst, MCInst &Res) const { } diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index 234455e..233214b 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -122,14 +122,16 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum) { switch (RegEnum) { case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64: - case Mips::D0: + case Mips::D0: case Mips::FCC0: case Mips::AC0: return 0; case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64: + case Mips::AC1: return 1; case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64: - case Mips::D1: + case Mips::D1: case Mips::AC2: return 2; case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64: + case Mips::AC3: return 3; case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64: case Mips::D2: diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.cpp new file mode 100644 index 0000000..15c4282 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.cpp @@ -0,0 +1,81 @@ +//===-- MipsDirectObjLower.cpp - Mips LLVM direct object lowering -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower Mips MCInst records that are normally +// left to the assembler to lower such as large shifts. +// +//===----------------------------------------------------------------------===// +#include "MipsInstrInfo.h" +#include "MCTargetDesc/MipsDirectObjLower.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +// If the D<shift> instruction has a shift amount that is greater +// than 31 (checked in calling routine), lower it to a D<shift>32 instruction +void Mips::LowerLargeShift(MCInst& Inst) { + + assert(Inst.getNumOperands() == 3 && "Invalid no. of operands for shift!"); + assert(Inst.getOperand(2).isImm()); + + int64_t Shift = Inst.getOperand(2).getImm(); + if (Shift <= 31) + return; // Do nothing + Shift -= 32; + + // saminus32 + Inst.getOperand(2).setImm(Shift); + + switch (Inst.getOpcode()) { + default: + // Calling function is not synchronized + llvm_unreachable("Unexpected shift instruction"); + case Mips::DSLL: + Inst.setOpcode(Mips::DSLL32); + return; + case Mips::DSRL: + Inst.setOpcode(Mips::DSRL32); + return; + case Mips::DSRA: + Inst.setOpcode(Mips::DSRA32); + return; + } +} + +// Pick a DEXT or DINS instruction variant based on the pos and size operands +void Mips::LowerDextDins(MCInst& InstIn) { + int Opcode = InstIn.getOpcode(); + + if (Opcode == Mips::DEXT) + assert(InstIn.getNumOperands() == 4 && + "Invalid no. of machine operands for DEXT!"); + else // Only DEXT and DINS are possible + assert(InstIn.getNumOperands() == 5 && + "Invalid no. of machine operands for DINS!"); + + assert(InstIn.getOperand(2).isImm()); + int64_t pos = InstIn.getOperand(2).getImm(); + assert(InstIn.getOperand(3).isImm()); + int64_t size = InstIn.getOperand(3).getImm(); + + if (size <= 32) { + if (pos < 32) // DEXT/DINS, do nothing + return; + // DEXTU/DINSU + InstIn.getOperand(2).setImm(pos - 32); + InstIn.setOpcode((Opcode == Mips::DEXT) ? Mips::DEXTU : Mips::DINSU); + return; + } + // DEXTM/DINSM + assert(pos < 32 && "DEXT/DINS cannot have both size and pos > 32"); + InstIn.getOperand(3).setImm(size - 32); + InstIn.setOpcode((Opcode == Mips::DEXT) ? Mips::DEXTM : Mips::DINSM); + return; +} diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.h new file mode 100644 index 0000000..8813cc9 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.h @@ -0,0 +1,28 @@ +//===-- MipsDirectObjLower.h - Mips LLVM direct object lowering *- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSDIRECTOBJLOWER_H +#define MIPSDIRECTOBJLOWER_H +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + class MCInst; + class MCStreamer; + + namespace Mips { + /// MipsDirectObjLower - This name space is used to lower MCInstr in cases + // where the assembler usually finishes the lowering + // such as large shifts. + void LowerLargeShift(MCInst &Inst); + void LowerDextDins(MCInst &Inst); + } +} + +#endif diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 8e84b3f..5d240fe 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -34,7 +34,8 @@ namespace { class MipsELFObjectWriter : public MCELFObjectTargetWriter { public: - MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64); + MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, + bool _isN64, bool IsLittleEndian); virtual ~MipsELFObjectWriter(); @@ -53,9 +54,9 @@ namespace { } MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, - bool _isN64) + bool _isN64, bool IsLittleEndian) : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS, - /*HasRelocationAddend*/ false, + /*HasRelocationAddend*/ (_isN64) ? true : false, /*IsN64*/ _isN64) {} MipsELFObjectWriter::~MipsELFObjectWriter() {} @@ -274,6 +275,7 @@ MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, bool IsLittleEndian, bool Is64Bit) { MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI, - (Is64Bit) ? true : false); + (Is64Bit) ? true : false, + IsLittleEndian); return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 8dab62d..7fbdae0 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -13,6 +13,7 @@ // #define DEBUG_TYPE "mccodeemitter" #include "MCTargetDesc/MipsBaseInfo.h" +#include "MCTargetDesc/MipsDirectObjLower.h" #include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/ADT/APFloat.h" @@ -29,17 +30,14 @@ using namespace llvm; namespace { class MipsMCCodeEmitter : public MCCodeEmitter { - MipsMCCodeEmitter(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT + MipsMCCodeEmitter(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; - const MCSubtargetInfo &STI; - MCContext &Ctx; bool IsLittleEndian; public: - MipsMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx, bool IsLittle) : - MCII(mcii), STI(sti) , Ctx(ctx), IsLittleEndian(IsLittle) {} + MipsMCCodeEmitter(const MCInstrInfo &mcii, bool IsLittle) : + MCII(mcii), IsLittleEndian(IsLittle) {} ~MipsMCCodeEmitter() {} @@ -95,7 +93,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEB(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new MipsMCCodeEmitter(MCII, STI, Ctx, false); + return new MipsMCCodeEmitter(MCII, false); } MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, @@ -103,7 +101,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new MipsMCCodeEmitter(MCII, STI, Ctx, true); + return new MipsMCCodeEmitter(MCII, true); } /// EncodeInstruction - Emit the instruction. @@ -112,16 +110,35 @@ void MipsMCCodeEmitter:: EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { - uint32_t Binary = getBinaryCodeForInstr(MI, Fixups); + + // Non-pseudo instructions that get changed for direct object + // only based on operand values. + // If this list of instructions get much longer we will move + // the check to a function call. Until then, this is more efficient. + MCInst TmpInst = MI; + switch (MI.getOpcode()) { + // If shift amount is >= 32 it the inst needs to be lowered further + case Mips::DSLL: + case Mips::DSRL: + case Mips::DSRA: + Mips::LowerLargeShift(TmpInst); + break; + // Double extract instruction is chosen by pos and size operands + case Mips::DEXT: + case Mips::DINS: + Mips::LowerDextDins(TmpInst); + } + + uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups); // Check for unimplemented opcodes. - // Unfortunately in MIPS both NOT and SLL will come in with Binary == 0 + // Unfortunately in MIPS both NOP and SLL will come in with Binary == 0 // so we have to special check for them. - unsigned Opcode = MI.getOpcode(); + unsigned Opcode = TmpInst.getOpcode(); if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) && !Binary) llvm_unreachable("unimplemented opcode in EncodeInstruction()"); - const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode()); uint64_t TSFlags = Desc.TSFlags; // Pseudo instructions don't get encoded and shouldn't be here @@ -129,8 +146,10 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if ((TSFlags & MipsII::FormMask) == MipsII::Pseudo) llvm_unreachable("Pseudo opcode found in EncodeInstruction()"); - // For now all instructions are 4 bytes - int Size = 4; // FIXME: Have Desc.getSize() return the correct value! + // Get byte count of instruction + unsigned Size = Desc.getSize(); + if (!Size) + llvm_unreachable("Desc.getSize() returns 0"); EmitInstruction(Binary, Size, OS); } @@ -143,7 +162,11 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); - assert(MO.isExpr() && "getBranchTargetOpValue expects only expressions"); + + // If the destination is an immediate, we have nothing to do. + if (MO.isImm()) return MO.getImm(); + assert(MO.isExpr() && + "getBranchTargetOpValue expects only expressions or immediates"); const MCExpr *Expr = MO.getExpr(); Fixups.push_back(MCFixup::Create(0, Expr, @@ -159,7 +182,10 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); - assert(MO.isExpr() && "getJumpTargetOpValue expects only expressions"); + // If the destination is an immediate, we have nothing to do. + if (MO.isImm()) return MO.getImm(); + assert(MO.isExpr() && + "getJumpTargetOpValue expects only expressions or an immediate"); const MCExpr *Expr = MO.getExpr(); Fixups.push_back(MCFixup::Create(0, Expr, diff --git a/contrib/llvm/lib/Target/Mips/Mips.td b/contrib/llvm/lib/Target/Mips/Mips.td index 90f7942..90c01d5 100644 --- a/contrib/llvm/lib/Target/Mips/Mips.td +++ b/contrib/llvm/lib/Target/Mips/Mips.td @@ -77,6 +77,10 @@ def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion", def FeatureMips16 : SubtargetFeature<"mips16", "InMips16Mode", "true", "Mips16 mode">; +def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Mips DSP ASE">; +def FeatureDSPR2 : SubtargetFeature<"dspr2", "HasDSPR2", "true", + "Mips DSP-R2 ASE", [FeatureDSP]>; + //===----------------------------------------------------------------------===// // Mips processors supported. //===----------------------------------------------------------------------===// @@ -95,9 +99,20 @@ def MipsAsmWriter : AsmWriter { bit isMCAsmWriter = 1; } +def MipsAsmParser : AsmParser { + let ShouldEmitMatchRegisterName = 0; +} + +def MipsAsmParserVariant : AsmParserVariant { + int Variant = 0; + + // Recognize hard coded registers. + string RegisterPrefix = "$"; +} + def Mips : Target { let InstructionSet = MipsInstrInfo; - + let AssemblyParsers = [MipsAsmParser]; let AssemblyWriters = [MipsAsmWriter]; + let AssemblyParserVariants = [MipsAsmParserVariant]; } - diff --git a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp index 030042f..4e6b21f 100644 --- a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp @@ -20,7 +20,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -41,6 +41,11 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const { // Adjust stack. if (isInt<16>(-StackSize)) BuildMI(MBB, MBBI, dl, TII.get(Mips::SaveRaF16)).addImm(StackSize); + + if (hasFP(MF)) + BuildMI(MBB, MBBI, dl, TII.get(Mips::MoveR3216), Mips::S0) + .addReg(Mips::SP); + } void Mips16FrameLowering::emitEpilogue(MachineFunction &MF, @@ -55,6 +60,10 @@ void Mips16FrameLowering::emitEpilogue(MachineFunction &MF, if (!StackSize) return; + if (hasFP(MF)) + BuildMI(MBB, MBBI, dl, TII.get(Mips::Move32R16), Mips::SP) + .addReg(Mips::S0); + // Adjust stack. if (isInt<16>(StackSize)) // assumes stacksize multiple of 8 @@ -66,19 +75,58 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { - // FIXME: implement. + MachineFunction *MF = MBB.getParent(); + MachineBasicBlock *EntryBlock = MF->begin(); + + // + // Registers RA, S0,S1 are the callee saved registers and they + // will be saved with the "save" instruction + // during emitPrologue + // + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. Do not add if the register is + // RA and return address is taken, because it has already been added in + // method MipsTargetLowering::LowerRETURNADDR. + // It's killed at the spill, unless the register is RA and return address + // is taken. + unsigned Reg = CSI[i].getReg(); + bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA) + && MF->getFrameInfo()->isReturnAddressTaken(); + if (!IsRAAndRetAddrIsTaken) + EntryBlock->addLiveIn(Reg); + } + + return true; +} + +bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + // + // Registers RA,S0,S1 are the callee saved registers and they will be restored + // with the restore instruction during emitEpilogue. + // We need to override this virtual function, otherwise llvm will try and + // restore the registers on it's on from the stack. + // + return true; } bool Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { - // FIXME: implement. - return true; + const MachineFrameInfo *MFI = MF.getFrameInfo(); + // Reserve call frame if the size of the maximum call frame fits into 15-bit + // immediate field and there are no variable sized objects on the stack. + return isInt<15>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects(); } void Mips16FrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { + MF.getRegInfo().setPhysRegUsed(Mips::RA); + MF.getRegInfo().setPhysRegUsed(Mips::S0); + MF.getRegInfo().setPhysRegUsed(Mips::S1); } const MipsFrameLowering * diff --git a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h index 25cc37b..01db71e 100644 --- a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h +++ b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h @@ -32,6 +32,11 @@ public: const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const; + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + bool hasReservedCallFrame(const MachineFunction &MF) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp index 2bc286b..619646b 100644 --- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp @@ -25,7 +25,7 @@ using namespace llvm; Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm) - : MipsInstrInfo(tm, /* FIXME: set mips16 unconditional br */ 0), + : MipsInstrInfo(tm, Mips::BimmX16), RI(*tm.getSubtargetImpl(), *this) {} const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const { @@ -58,12 +58,22 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - unsigned Opc = 0, ZeroReg = 0; + unsigned Opc = 0; + + if (Mips::CPU16RegsRegClass.contains(DestReg) && + Mips::CPURegsRegClass.contains(SrcReg)) + Opc = Mips::MoveR3216; + else if (Mips::CPURegsRegClass.contains(DestReg) && + Mips::CPU16RegsRegClass.contains(SrcReg)) + Opc = Mips::Move32R16; + else if ((SrcReg == Mips::HI) && + (Mips::CPU16RegsRegClass.contains(DestReg))) + Opc = Mips::Mfhi16, SrcReg = 0; + + else if ((SrcReg == Mips::LO) && + (Mips::CPU16RegsRegClass.contains(DestReg))) + Opc = Mips::Mflo16, SrcReg = 0; - if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg. - if (Mips::CPURegsRegClass.contains(SrcReg)) - Opc = Mips::Mov32R16; - } assert(Opc && "Cannot copy registers"); @@ -72,9 +82,6 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (DestReg) MIB.addReg(DestReg, RegState::Define); - if (ZeroReg) - MIB.addReg(ZeroReg); - if (SrcReg) MIB.addReg(SrcReg, getKillRegState(KillSrc)); } @@ -84,7 +91,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(false && "Implement this function."); + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); + unsigned Opc = 0; + if (Mips::CPU16RegsRegClass.hasSubClassEq(RC)) + Opc = Mips::SwRxSpImmX16; + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } void Mips16InstrInfo:: @@ -92,7 +107,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(false && "Implement this function."); + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); + unsigned Opc = 0; + + if (Mips::CPU16RegsRegClass.hasSubClassEq(RC)) + Opc = Mips::LwRxSpImmX16; + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); } bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { @@ -102,7 +126,7 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { default: return false; case Mips::RetRA16: - ExpandRetRA16(MBB, MI, Mips::JrRa16); + ExpandRetRA16(MBB, MI, Mips::JrcRa16); break; } @@ -113,12 +137,55 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { /// GetOppositeBranchOpc - Return the inverse of the specified /// opcode, e.g. turning BEQ to BNE. unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const { + switch (Opc) { + default: llvm_unreachable("Illegal opcode!"); + case Mips::BeqzRxImmX16: return Mips::BnezRxImmX16; + case Mips::BnezRxImmX16: return Mips::BeqzRxImmX16; + case Mips::BteqzT8CmpX16: return Mips::BtnezT8CmpX16; + case Mips::BteqzT8SltX16: return Mips::BtnezT8SltX16; + case Mips::BteqzT8SltiX16: return Mips::BtnezT8SltiX16; + case Mips::BtnezX16: return Mips::BteqzX16; + case Mips::BtnezT8CmpiX16: return Mips::BteqzT8CmpiX16; + case Mips::BtnezT8SltuX16: return Mips::BteqzT8SltuX16; + case Mips::BtnezT8SltiuX16: return Mips::BteqzT8SltiuX16; + case Mips::BteqzX16: return Mips::BtnezX16; + case Mips::BteqzT8CmpiX16: return Mips::BtnezT8CmpiX16; + case Mips::BteqzT8SltuX16: return Mips::BtnezT8SltuX16; + case Mips::BteqzT8SltiuX16: return Mips::BtnezT8SltiuX16; + case Mips::BtnezT8CmpX16: return Mips::BteqzT8CmpX16; + case Mips::BtnezT8SltX16: return Mips::BteqzT8SltX16; + case Mips::BtnezT8SltiX16: return Mips::BteqzT8SltiX16; + } assert(false && "Implement this function."); return 0; } +/// Adjust SP by Amount bytes. +void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + if (isInt<16>(Amount)) { + if (Amount < 0) + BuildMI(MBB, I, DL, get(Mips::SaveDecSpF16)). addImm(-Amount); + else if (Amount > 0) + BuildMI(MBB, I, DL, get(Mips::RestoreIncSpF16)).addImm(Amount); + } + else + // not implemented for large values yet + assert(false && "adjust stack pointer amount exceeded"); +} + unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { - return 0; + return (Opc == Mips::BeqzRxImmX16 || Opc == Mips::BimmX16 || + Opc == Mips::BnezRxImmX16 || Opc == Mips::BteqzX16 || + Opc == Mips::BteqzT8CmpX16 || Opc == Mips::BteqzT8CmpiX16 || + Opc == Mips::BteqzT8SltX16 || Opc == Mips::BteqzT8SltuX16 || + Opc == Mips::BteqzT8SltiX16 || Opc == Mips::BteqzT8SltiuX16 || + Opc == Mips::BtnezX16 || Opc == Mips::BtnezT8CmpX16 || + Opc == Mips::BtnezT8CmpiX16 || Opc == Mips::BtnezT8SltX16 || + Opc == Mips::BtnezT8SltuX16 || Opc == Mips::BtnezT8SltiX16 || + Opc == Mips::BtnezT8SltiuX16 ) ? Opc : 0; } void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB, diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h index 260c5b6..e06ccfe 100644 --- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h +++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h @@ -64,6 +64,10 @@ public: virtual unsigned GetOppositeBranchOpc(unsigned Opc) const; + /// Adjust SP by Amount bytes. + void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + private: virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td index 94cf984..5defc75 100644 --- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td @@ -10,21 +10,74 @@ // This file describes Mips16 instructions. // //===----------------------------------------------------------------------===// +// +// +// Mips Address +// +def addr16 : + ComplexPattern<iPTR, 3, "SelectAddr16", [frameindex], [SDNPWantParent]>; // -// RRR-type instruction format +// Address operand +def mem16 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops CPU16Regs, simm16, CPU16Regs); + let EncoderMethod = "getMemEncoding"; +} + +def mem16_ea : Operand<i32> { + let PrintMethod = "printMemOperandEA"; + let MIOperandInfo = (ops CPU16Regs, simm16); + let EncoderMethod = "getMemEncoding"; +} + +// +// Compare a register and immediate and place result in CC +// Implicit use of T8 // +// EXT-CCRR Instruction format +// +class FEXT_CCRXI16_ins<bits<5> _op, string asmstr, + InstrItinClass itin>: + FEXT_RI16<_op, (outs CPU16Regs:$cc), (ins CPU16Regs:$rx, simm16:$imm), + !strconcat(asmstr, "\t$rx, $imm\n\tmove\t$cc, $$t8"), [], itin> { + let isCodeGenOnly=1; +} -class FRRR16_ins<bits<2> _f, string asmstr, InstrItinClass itin> : - FRRR16<_f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), - !strconcat(asmstr, "\t$rz, $rx, $ry"), [], itin>; +// +// EXT-I instruction format +// +class FEXT_I16_ins<bits<5> eop, string asmstr, InstrItinClass itin> : + FEXT_I16<eop, (outs), (ins brtarget:$imm16), + !strconcat(asmstr, "\t$imm16"),[], itin>; // -// I8_MOV32R instruction format (used only by MOV32R instruction) +// EXT-I8 instruction format // -class FI8_MOV32R16_ins<string asmstr, InstrItinClass itin>: - FI8_MOV32R16<(outs CPURegs:$r32), (ins CPU16Regs:$rz), - !strconcat(asmstr, "\t$r32, $rz"), [], itin>; + +class FEXT_I816_ins_base<bits<3> _func, string asmstr, + string asmstr2, InstrItinClass itin>: + FEXT_I816<_func, (outs), (ins uimm16:$imm), !strconcat(asmstr, asmstr2), + [], itin>; + +class FEXT_I816_ins<bits<3> _func, string asmstr, + InstrItinClass itin>: + FEXT_I816_ins_base<_func, asmstr, "\t$imm", itin>; + +// +// Assembler formats in alphabetical order. +// Natural and pseudos are mixed together. +// +// Compare two registers and place result in CC +// Implicit use of T8 +// +// CC-RR Instruction format +// +class FCCRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : + FRR16<f, (outs CPU16Regs:$cc), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$cc, $$t8"), [], itin> { + let isCodeGenOnly=1; +} // // EXT-RI instruction format @@ -42,6 +95,10 @@ class FEXT_RI16_ins<bits<5> _op, string asmstr, class FEXT_RI16_PC_ins<bits<5> _op, string asmstr, InstrItinClass itin>: FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>; +class FEXT_RI16_B_ins<bits<5> _op, string asmstr, + InstrItinClass itin>: + FEXT_RI16<_op, (outs), (ins CPU16Regs:$rx, brtarget:$imm), + !strconcat(asmstr, "\t$rx, $imm"), [], itin>; class FEXT_2RI16_ins<bits<5> _op, string asmstr, InstrItinClass itin>: @@ -51,6 +108,104 @@ class FEXT_2RI16_ins<bits<5> _op, string asmstr, } +// this has an explicit sp argument that we ignore to work around a problem +// in the compiler +class FEXT_RI16_SP_explicit_ins<bits<5> _op, string asmstr, + InstrItinClass itin>: + FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPUSPReg:$ry, simm16:$imm), + !strconcat(asmstr, "\t$rx, $imm ( $ry ); "), [], itin>; + +// +// EXT-RRI instruction format +// + +class FEXT_RRI16_mem_ins<bits<5> op, string asmstr, Operand MemOpnd, + InstrItinClass itin>: + FEXT_RRI16<op, (outs CPU16Regs:$ry), (ins MemOpnd:$addr), + !strconcat(asmstr, "\t$ry, $addr"), [], itin>; + +class FEXT_RRI16_mem2_ins<bits<5> op, string asmstr, Operand MemOpnd, + InstrItinClass itin>: + FEXT_RRI16<op, (outs ), (ins CPU16Regs:$ry, MemOpnd:$addr), + !strconcat(asmstr, "\t$ry, $addr"), [], itin>; + +// +// +// EXT-RRI-A instruction format +// + +class FEXT_RRI_A16_mem_ins<bits<1> op, string asmstr, Operand MemOpnd, + InstrItinClass itin>: + FEXT_RRI_A16<op, (outs CPU16Regs:$ry), (ins MemOpnd:$addr), + !strconcat(asmstr, "\t$ry, $addr"), [], itin>; + +// +// EXT-SHIFT instruction format +// +class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>: + FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa), + !strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>; + +// +// EXT-T8I8 +// +class FEXT_T8I816_ins<bits<3> _func, string asmstr, string asmstr2, + InstrItinClass itin>: + FEXT_I816<_func, (outs), + (ins CPU16Regs:$rx, CPU16Regs:$ry, brtarget:$imm), + !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t", + !strconcat(asmstr, "\t$imm"))),[], itin> { + let isCodeGenOnly=1; +} + +// +// EXT-T8I8I +// +class FEXT_T8I8I16_ins<bits<3> _func, string asmstr, string asmstr2, + InstrItinClass itin>: + FEXT_I816<_func, (outs), + (ins CPU16Regs:$rx, simm16:$imm, brtarget:$targ), + !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t", + !strconcat(asmstr, "\t$targ"))), [], itin> { + let isCodeGenOnly=1; +} +// + + +// +// I8_MOVR32 instruction format (used only by the MOVR32 instructio +// +class FI8_MOVR3216_ins<string asmstr, InstrItinClass itin>: + FI8_MOVR3216<(outs CPU16Regs:$rz), (ins CPURegs:$r32), + !strconcat(asmstr, "\t$rz, $r32"), [], itin>; + +// +// I8_MOV32R instruction format (used only by MOV32R instruction) +// + +class FI8_MOV32R16_ins<string asmstr, InstrItinClass itin>: + FI8_MOV32R16<(outs CPURegs:$r32), (ins CPU16Regs:$rz), + !strconcat(asmstr, "\t$r32, $rz"), [], itin>; + +// +// This are pseudo formats for multiply +// This first one can be changed to non pseudo now. +// +// MULT +// +class FMULT16_ins<string asmstr, InstrItinClass itin> : + MipsPseudo16<(outs), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry"), []>; + +// +// MULT-LO +// +class FMULT16_LO_ins<string asmstr, InstrItinClass itin> : + MipsPseudo16<(outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry\n\tmflo\t$rz"), []> { + let isCodeGenOnly=1; +} + // // RR-type instruction format // @@ -60,6 +215,27 @@ class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : !strconcat(asmstr, "\t$rx, $ry"), [], itin> { } +class FRRTR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : + FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$rz, $$t8"), [], itin> ; + +// +// maybe refactor but need a $zero as a dummy first parameter +// +class FRR16_div_ins<bits<5> f, string asmstr, InstrItinClass itin> : + FRR16<f, (outs ), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$$zero, $rx, $ry"), [], itin> ; + +class FUnaryRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : + FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry"), [], itin> ; + + +class FRR16_M_ins<bits<5> f, string asmstr, + InstrItinClass itin> : + FRR16<f, (outs CPU16Regs:$rx), (ins), + !strconcat(asmstr, "\t$rx"), [], itin>; + class FRxRxRy16_ins<bits<5> f, string asmstr, InstrItinClass itin> : FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), @@ -74,35 +250,109 @@ class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_, FRR16_JALRC<nd_, l_, 1, (outs), (ins), !strconcat(asmstr, "\t $$ra"), [], itin> ; + +class FRR16_JALRC_ins<bits<1> nd, bits<1> l, bits<1> ra, + string asmstr, InstrItinClass itin>: + FRR16_JALRC<nd, l, ra, (outs), (ins CPU16Regs:$rx), + !strconcat(asmstr, "\t $rx"), [], itin> ; + // -// EXT-RRI instruction format +// RRR-type instruction format // -class FEXT_RRI16_mem_ins<bits<5> op, string asmstr, Operand MemOpnd, - InstrItinClass itin>: - FEXT_RRI16<op, (outs CPU16Regs:$ry), (ins MemOpnd:$addr), - !strconcat(asmstr, "\t$ry, $addr"), [], itin>; +class FRRR16_ins<bits<2> _f, string asmstr, InstrItinClass itin> : + FRRR16<_f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rz, $rx, $ry"), [], itin>; -class FEXT_RRI16_mem2_ins<bits<5> op, string asmstr, Operand MemOpnd, - InstrItinClass itin>: - FEXT_RRI16<op, (outs ), (ins CPU16Regs:$ry, MemOpnd:$addr), - !strconcat(asmstr, "\t$ry, $addr"), [], itin>; +// +// These Sel patterns support the generation of conditional move +// pseudo instructions. +// +// The nomenclature uses the components making up the pseudo and may +// be a bit counter intuitive when compared with the end result we seek. +// For example using a bqez in the example directly below results in the +// conditional move being done if the tested register is not zero. +// I considered in easier to check by keeping the pseudo consistent with +// it's components but it could have been done differently. +// +// The simplest case is when can test and operand directly and do the +// conditional move based on a simple mips16 conditional +// branch instruction. +// for example: +// if $op == beqz or bnez: +// +// $op1 $rt, .+4 +// move $rd, $rs +// +// if $op == beqz, then if $rt != 0, then the conditional assignment +// $rd = $rs is done. +// if $op == bnez, then if $rt == 0, then the conditional assignment +// $rd = $rs is done. // -// EXT-SHIFT instruction format +// So this pseudo class only has one operand, i.e. op // -class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>: - FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa), - !strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>; +class Sel<bits<5> f1, string op, InstrItinClass itin>: + MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, + CPU16Regs:$rt), + !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), [], itin, + Pseudo16> { + let isCodeGenOnly=1; + let Constraints = "$rd = $rd_"; +} // -// Address operand -def mem16 : Operand<i32> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops CPU16Regs, simm16); - let EncoderMethod = "getMemEncoding"; +// The next two instruction classes allow for an operand which tests +// two operands and returns a value in register T8 and +//then does a conditional branch based on the value of T8 +// + +// op2 can be cmpi or slti/sltiu +// op1 can bteqz or btnez +// the operands for op2 are a register and a signed constant +// +// $op2 $t, $imm ;test register t and branch conditionally +// $op1 .+4 ;op1 is a conditional branch +// move $rd, $rs +// +// +class SeliT<bits<5> f1, string op1, bits<5> f2, string op2, + InstrItinClass itin>: + MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, + CPU16Regs:$rl, simm16:$imm), + !strconcat(op2, + !strconcat("\t$rl, $imm\n\t", + !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin, + Pseudo16> { + let isCodeGenOnly=1; + let Constraints = "$rd = $rd_"; +} + +// +// op2 can be cmp or slt/sltu +// op1 can be bteqz or btnez +// the operands for op2 are two registers +// op1 is a conditional branch +// +// +// $op2 $rl, $rr ;test registers rl,rr +// $op1 .+4 ;op2 is a conditional branch +// move $rd, $rs +// +// +class SelT<bits<5> f1, string op1, bits<5> f2, string op2, + InstrItinClass itin>: + MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, + CPU16Regs:$rl, CPU16Regs:$rr), + !strconcat(op2, + !strconcat("\t$rl, $rr\n\t", + !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin, + Pseudo16> { + let isCodeGenOnly=1; + let Constraints = "$rd = $rd_"; } + // // Some general instruction class info // @@ -115,6 +365,24 @@ class ArithLogic16Defs<bit isCom=0> { bit neverHasSideEffects = 1; } +class branch16 { + bit isBranch = 1; + bit isTerminator = 1; + bit isBarrier = 1; +} + +class cbranch16 { + bit isBranch = 1; + bit isTerminator = 1; +} + +class MayLoad { + bit mayLoad = 1; +} + +class MayStore { + bit mayStore = 1; +} // // Format: ADDIU rx, immediate MIPS16e @@ -126,6 +394,9 @@ def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>; def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>, ArithLogic16Defs<0>; +def AddiuRxRyOffMemX16: + FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIAlu>; + // // Format: ADDIU rx, pc, immediate MIPS16e @@ -148,6 +419,87 @@ def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>; def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>; + +// +// Format: BEQZ rx, offset MIPS16e +// Purpose: Branch on Equal to Zero (Extended) +// To test a GPR then do a PC-relative conditional branch. +// +def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16; + +// Format: B offset MIPS16e +// Purpose: Unconditional Branch +// To do an unconditional PC-relative branch. +// +def BimmX16: FEXT_I16_ins<0b00010, "b", IIAlu>, branch16; + +// +// Format: BNEZ rx, offset MIPS16e +// Purpose: Branch on Not Equal to Zero (Extended) +// To test a GPR then do a PC-relative conditional branch. +// +def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16; + +// +// Format: BTEQZ offset MIPS16e +// Purpose: Branch on T Equal to Zero (Extended) +// To test special register T then do a PC-relative conditional branch. +// +def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16; + +def BteqzT8CmpX16: FEXT_T8I816_ins<0b000, "bteqz", "cmp", IIAlu>, cbranch16; + +def BteqzT8CmpiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "cmpi", IIAlu>, + cbranch16; + +def BteqzT8SltX16: FEXT_T8I816_ins<0b000, "bteqz", "slt", IIAlu>, cbranch16; + +def BteqzT8SltuX16: FEXT_T8I816_ins<0b000, "bteqz", "sltu", IIAlu>, cbranch16; + +def BteqzT8SltiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "slti", IIAlu>, cbranch16; + +def BteqzT8SltiuX16: FEXT_T8I8I16_ins<0b000, "bteqz", "sltiu", IIAlu>, + cbranch16; + +// +// Format: BTNEZ offset MIPS16e +// Purpose: Branch on T Not Equal to Zero (Extended) +// To test special register T then do a PC-relative conditional branch. +// +def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16; + +def BtnezT8CmpX16: FEXT_T8I816_ins<0b000, "btnez", "cmp", IIAlu>, cbranch16; + +def BtnezT8CmpiX16: FEXT_T8I8I16_ins<0b000, "btnez", "cmpi", IIAlu>, cbranch16; + +def BtnezT8SltX16: FEXT_T8I816_ins<0b000, "btnez", "slt", IIAlu>, cbranch16; + +def BtnezT8SltuX16: FEXT_T8I816_ins<0b000, "btnez", "sltu", IIAlu>, cbranch16; + +def BtnezT8SltiX16: FEXT_T8I8I16_ins<0b000, "btnez", "slti", IIAlu>, cbranch16; + +def BtnezT8SltiuX16: FEXT_T8I8I16_ins<0b000, "btnez", "sltiu", IIAlu>, + cbranch16; + +// +// Format: DIV rx, ry MIPS16e +// Purpose: Divide Word +// To divide 32-bit signed integers. +// +def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> { + let Defs = [HI, LO]; +} + +// +// Format: DIVU rx, ry MIPS16e +// Purpose: Divide Unsigned Word +// To divide 32-bit unsigned integers. +// +def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> { + let Defs = [HI, LO]; +} + + // // Format: JR ra MIPS16e // Purpose: Jump Register Through Register ra @@ -155,35 +507,56 @@ def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>; // address register. // -def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu>; +def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> { + let isBranch = 1; + let isIndirectBranch = 1; + let hasDelaySlot = 1; + let isTerminator=1; + let isBarrier=1; +} + +def JrcRa16: FRR16_JALRC_RA_only_ins<0, 0, "jrc", IIAlu> { + let isBranch = 1; + let isIndirectBranch = 1; + let isTerminator=1; + let isBarrier=1; +} +def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIAlu> { + let isBranch = 1; + let isIndirectBranch = 1; + let isTerminator=1; + let isBarrier=1; +} // // Format: LB ry, offset(rx) MIPS16e // Purpose: Load Byte (Extended) // To load a byte from memory as a signed value. // -def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IIAlu>; +def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad; // // Format: LBU ry, offset(rx) MIPS16e // Purpose: Load Byte Unsigned (Extended) // To load a byte from memory as a unsigned value. // -def LbuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IIAlu>; +def LbuRxRyOffMemX16: + FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad; // // Format: LH ry, offset(rx) MIPS16e // Purpose: Load Halfword signed (Extended) // To load a halfword from memory as a signed value. // -def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IIAlu>; +def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad; // // Format: LHU ry, offset(rx) MIPS16e // Purpose: Load Halfword unsigned (Extended) // To load a halfword from memory as an unsigned value. // -def LhuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IIAlu>; +def LhuRxRyOffMemX16: + FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad; // // Format: LI rx, immediate MIPS16e @@ -197,28 +570,98 @@ def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>; // Purpose: Load Word (Extended) // To load a word from memory as a signed value. // -def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>; +def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad; + +// Format: LW rx, offset(sp) MIPS16e +// Purpose: Load Word (SP-Relative, Extended) +// To load an SP-relative word from memory as a signed value. +// +def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad; // // Format: MOVE r32, rz MIPS16e // Purpose: Move // To move the contents of a GPR to a GPR. // -def Mov32R16: FI8_MOV32R16_ins<"move", IIAlu>; +def Move32R16: FI8_MOV32R16_ins<"move", IIAlu>; + +// +// Format: MOVE ry, r32 MIPS16e +//Purpose: Move +// To move the contents of a GPR to a GPR. +// +def MoveR3216: FI8_MOVR3216_ins<"move", IIAlu>; + +// +// Format: MFHI rx MIPS16e +// Purpose: Move From HI Register +// To copy the special purpose HI register to a GPR. +// +def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> { + let Uses = [HI]; + let neverHasSideEffects = 1; +} + +// +// Format: MFLO rx MIPS16e +// Purpose: Move From LO Register +// To copy the special purpose LO register to a GPR. +// +def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> { + let Uses = [LO]; + let neverHasSideEffects = 1; +} + +// +// Pseudo Instruction for mult +// +def MultRxRy16: FMULT16_ins<"mult", IIAlu> { + let isCommutable = 1; + let neverHasSideEffects = 1; + let Defs = [HI, LO]; +} + +def MultuRxRy16: FMULT16_ins<"multu", IIAlu> { + let isCommutable = 1; + let neverHasSideEffects = 1; + let Defs = [HI, LO]; +} + +// +// Format: MULT rx, ry MIPS16e +// Purpose: Multiply Word +// To multiply 32-bit signed integers. +// +def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> { + let isCommutable = 1; + let neverHasSideEffects = 1; + let Defs = [HI, LO]; +} + +// +// Format: MULTU rx, ry MIPS16e +// Purpose: Multiply Unsigned Word +// To multiply 32-bit unsigned integers. +// +def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIAlu> { + let isCommutable = 1; + let neverHasSideEffects = 1; + let Defs = [HI, LO]; +} // // Format: NEG rx, ry MIPS16e // Purpose: Negate // To negate an integer value. // -def NegRxRy16: FRR16_ins<0b11101, "neg", IIAlu>; +def NegRxRy16: FUnaryRR16_ins<0b11101, "neg", IIAlu>; // // Format: NOT rx, ry MIPS16e // Purpose: Not // To complement an integer value // -def NotRxRy16: FRR16_ins<0b01111, "not", IIAlu>; +def NotRxRy16: FUnaryRR16_ins<0b01111, "not", IIAlu>; // // Format: OR rx, ry MIPS16e @@ -240,10 +683,22 @@ def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>; // for direct object emitter, encoding needs to be adjusted for the // frame size // -let ra=1, s=0,s0=0,s1=0 in +let ra=1, s=0,s0=1,s1=1 in def RestoreRaF16: FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), - "restore \t$$ra, $frame_size", [], IILoad >; + "restore\t$$ra, $$s0, $$s1, $frame_size", [], IILoad >, MayLoad { + let isCodeGenOnly = 1; +} + +// Use Restore to increment SP since SP is not a Mip 16 register, this +// is an easy way to do that which does not require a register. +// +let ra=0, s=0,s0=0,s1=0 in +def RestoreIncSpF16: + FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), + "restore\t$frame_size", [], IILoad >, MayLoad { + let isCodeGenOnly = 1; +} // // Format: SAVE {ra,}{s0/s1/s0-1,}{framesize} (All arguments are optional) @@ -252,24 +707,152 @@ def RestoreRaF16: // To set up a stack frame on entry to a subroutine, // saving return address and static registers, and adjusting stack // -let ra=1, s=1,s0=0,s1=0 in +let ra=1, s=1,s0=1,s1=1 in def SaveRaF16: FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), - "save \t$$ra, $frame_size", [], IILoad >; + "save\t$$ra, $$s0, $$s1, $frame_size", [], IIStore >, MayStore { + let isCodeGenOnly = 1; +} // +// Use Save to decrement the SP by a constant since SP is not +// a Mips16 register. +// +let ra=0, s=0,s0=0,s1=0 in +def SaveDecSpF16: + FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), + "save\t$frame_size", [], IIStore >, MayStore { + let isCodeGenOnly = 1; +} +// // Format: SB ry, offset(rx) MIPS16e // Purpose: Store Byte (Extended) // To store a byte to memory. // -def SbRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIAlu>; +def SbRxRyOffMemX16: + FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIStore>, MayStore; // +// The Sel(T) instructions are pseudos +// T means that they use T8 implicitly. +// +// +// Format: SelBeqZ rd, rs, rt +// Purpose: if rt==0, do nothing +// else rs = rt +// +def SelBeqZ: Sel<0b00100, "beqz", IIAlu>; + +// +// Format: SelTBteqZCmp rd, rs, rl, rr +// Purpose: b = Cmp rl, rr. +// If b==0 then do nothing. +// if b!=0 then rd = rs +// +def SelTBteqZCmp: SelT<0b000, "bteqz", 0b01010, "cmp", IIAlu>; + +// +// Format: SelTBteqZCmpi rd, rs, rl, rr +// Purpose: b = Cmpi rl, imm. +// If b==0 then do nothing. +// if b!=0 then rd = rs +// +def SelTBteqZCmpi: SeliT<0b000, "bteqz", 0b01110, "cmpi", IIAlu>; + +// +// Format: SelTBteqZSlt rd, rs, rl, rr +// Purpose: b = Slt rl, rr. +// If b==0 then do nothing. +// if b!=0 then rd = rs +// +def SelTBteqZSlt: SelT<0b000, "bteqz", 0b00010, "slt", IIAlu>; + +// +// Format: SelTBteqZSlti rd, rs, rl, rr +// Purpose: b = Slti rl, imm. +// If b==0 then do nothing. +// if b!=0 then rd = rs +// +def SelTBteqZSlti: SeliT<0b000, "bteqz", 0b01010, "slti", IIAlu>; + +// +// Format: SelTBteqZSltu rd, rs, rl, rr +// Purpose: b = Sltu rl, rr. +// If b==0 then do nothing. +// if b!=0 then rd = rs +// +def SelTBteqZSltu: SelT<0b000, "bteqz", 0b00011, "sltu", IIAlu>; + +// +// Format: SelTBteqZSltiu rd, rs, rl, rr +// Purpose: b = Sltiu rl, imm. +// If b==0 then do nothing. +// if b!=0 then rd = rs +// +def SelTBteqZSltiu: SeliT<0b000, "bteqz", 0b01011, "sltiu", IIAlu>; + +// +// Format: SelBnez rd, rs, rt +// Purpose: if rt!=0, do nothing +// else rs = rt +// +def SelBneZ: Sel<0b00101, "bnez", IIAlu>; + +// +// Format: SelTBtneZCmp rd, rs, rl, rr +// Purpose: b = Cmp rl, rr. +// If b!=0 then do nothing. +// if b0=0 then rd = rs +// +def SelTBtneZCmp: SelT<0b001, "btnez", 0b01010, "cmp", IIAlu>; + +// +// Format: SelTBtnezCmpi rd, rs, rl, rr +// Purpose: b = Cmpi rl, imm. +// If b!=0 then do nothing. +// if b==0 then rd = rs +// +def SelTBtneZCmpi: SeliT<0b000, "btnez", 0b01110, "cmpi", IIAlu>; + +// +// Format: SelTBtneZSlt rd, rs, rl, rr +// Purpose: b = Slt rl, rr. +// If b!=0 then do nothing. +// if b==0 then rd = rs +// +def SelTBtneZSlt: SelT<0b001, "btnez", 0b00010, "slt", IIAlu>; + +// +// Format: SelTBtneZSlti rd, rs, rl, rr +// Purpose: b = Slti rl, imm. +// If b!=0 then do nothing. +// if b==0 then rd = rs +// +def SelTBtneZSlti: SeliT<0b001, "btnez", 0b01010, "slti", IIAlu>; + +// +// Format: SelTBtneZSltu rd, rs, rl, rr +// Purpose: b = Sltu rl, rr. +// If b!=0 then do nothing. +// if b==0 then rd = rs +// +def SelTBtneZSltu: SelT<0b001, "btnez", 0b00011, "sltu", IIAlu>; + +// +// Format: SelTBtneZSltiu rd, rs, rl, rr +// Purpose: b = Slti rl, imm. +// If b!=0 then do nothing. +// if b==0 then rd = rs +// +def SelTBtneZSltiu: SeliT<0b001, "btnez", 0b01011, "sltiu", IIAlu>; +// +// // Format: SH ry, offset(rx) MIPS16e // Purpose: Store Halfword (Extended) // To store a halfword to memory. // -def ShRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIAlu>; +def ShRxRyOffMemX16: + FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIStore>, MayStore; // // Format: SLL rx, ry, sa MIPS16e @@ -285,8 +868,40 @@ def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>; // def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>; +// +// Format: SLTI rx, immediate MIPS16e +// Purpose: Set on Less Than Immediate (Extended) +// To record the result of a less-than comparison with a constant. +// +def SltiCCRxImmX16: FEXT_CCRXI16_ins<0b01010, "slti", IIAlu>; // +// Format: SLTIU rx, immediate MIPS16e +// Purpose: Set on Less Than Immediate Unsigned (Extended) +// To record the result of a less-than comparison with a constant. +// +def SltiuCCRxImmX16: FEXT_CCRXI16_ins<0b01011, "sltiu", IIAlu>; + +// +// Format: SLT rx, ry MIPS16e +// Purpose: Set on Less Than +// To record the result of a less-than comparison. +// +def SltRxRy16: FRR16_ins<0b00010, "slt", IIAlu>; + +def SltCCRxRy16: FCCRR16_ins<0b00010, "slt", IIAlu>; + +// Format: SLTU rx, ry MIPS16e +// Purpose: Set on Less Than Unsigned +// To record the result of an unsigned less-than comparison. +// +def SltuRxRyRz16: FRRTR16_ins<0b00011, "sltu", IIAlu> { + let isCodeGenOnly=1; +} + + +def SltuCCRxRy16: FCCRR16_ins<0b00011, "sltu", IIAlu>; +// // Format: SRAV ry, rx MIPS16e // Purpose: Shift Word Right Arithmetic Variable // To execute an arithmetic right-shift of a word by a variable @@ -333,9 +948,18 @@ def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>; // Purpose: Store Word (Extended) // To store a word to memory. // -def SwRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIAlu>; +def SwRxRyOffMemX16: + FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIStore>, MayStore; // +// Format: SW rx, offset(sp) MIPS16e +// Purpose: Store Word rx (SP-Relative) +// To store an SP-relative word to memory. +// +def SwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b11010, "sw", IIStore>, MayStore; + +// +// // Format: XOR rx, ry MIPS16e // Purpose: Xor // To do a bitwise logical XOR. @@ -361,6 +985,7 @@ class ArithLogic16_pat<SDNode OpNode, Instruction I> : def: ArithLogic16_pat<add, AdduRxRyRz16>; def: ArithLogic16_pat<and, AndRxRxRy16>; +def: ArithLogic16_pat<mul, MultRxRyRz16>; def: ArithLogic16_pat<or, OrRxRxRy16>; def: ArithLogic16_pat<sub, SubuRxRyRz16>; def: ArithLogic16_pat<xor, XorRxRxRy16>; @@ -385,35 +1010,533 @@ def: shift_rotate_reg16_pat<sra, SravRxRy16>; def: shift_rotate_reg16_pat<srl, SrlvRxRy16>; class LoadM16_pat<PatFrag OpNode, Instruction I> : - Mips16Pat<(OpNode addr:$addr), (I addr:$addr)>; + Mips16Pat<(OpNode addr16:$addr), (I addr16:$addr)>; def: LoadM16_pat<sextloadi8, LbRxRyOffMemX16>; def: LoadM16_pat<zextloadi8, LbuRxRyOffMemX16>; -def: LoadM16_pat<sextloadi16_a, LhRxRyOffMemX16>; -def: LoadM16_pat<zextloadi16_a, LhuRxRyOffMemX16>; -def: LoadM16_pat<load_a, LwRxRyOffMemX16>; +def: LoadM16_pat<sextloadi16, LhRxRyOffMemX16>; +def: LoadM16_pat<zextloadi16, LhuRxRyOffMemX16>; +def: LoadM16_pat<load, LwRxRyOffMemX16>; class StoreM16_pat<PatFrag OpNode, Instruction I> : - Mips16Pat<(OpNode CPU16Regs:$r, addr:$addr), (I CPU16Regs:$r, addr:$addr)>; + Mips16Pat<(OpNode CPU16Regs:$r, addr16:$addr), + (I CPU16Regs:$r, addr16:$addr)>; def: StoreM16_pat<truncstorei8, SbRxRyOffMemX16>; -def: StoreM16_pat<truncstorei16_a, ShRxRyOffMemX16>; -def: StoreM16_pat<store_a, SwRxRyOffMemX16>; +def: StoreM16_pat<truncstorei16, ShRxRyOffMemX16>; +def: StoreM16_pat<store, SwRxRyOffMemX16>; + +// Unconditional branch +class UncondBranch16_pat<SDNode OpNode, Instruction I>: + Mips16Pat<(OpNode bb:$imm16), (I bb:$imm16)> { + let Predicates = [RelocPIC, InMips16Mode]; + } + +// Indirect branch +def: Mips16Pat< + (brind CPU16Regs:$rs), + (JrcRx16 CPU16Regs:$rs)>; // Jump and Link (Call) -let isCall=1, hasDelaySlot=1 in +let isCall=1, hasDelaySlot=0 in def JumpLinkReg16: FRR16_JALRC<0, 0, 0, (outs), (ins CPU16Regs:$rs), - "jalr \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch>; + "jalrc \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch>; // Mips16 pseudos let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1, hasExtraSrcRegAllocReq = 1 in def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>; + +// setcc patterns + +class SetCC_R16<PatFrag cond_op, Instruction I>: + Mips16Pat<(cond_op CPU16Regs:$rx, CPU16Regs:$ry), + (I CPU16Regs:$rx, CPU16Regs:$ry)>; + +class SetCC_I16<PatFrag cond_op, PatLeaf imm_type, Instruction I>: + Mips16Pat<(cond_op CPU16Regs:$rx, imm_type:$imm16), + (I CPU16Regs:$rx, imm_type:$imm16)>; + + +def: Mips16Pat<(i32 addr16:$addr), + (AddiuRxRyOffMemX16 addr16:$addr)>; + + +// Large (>16 bit) immediate loads +def : Mips16Pat<(i32 imm:$imm), + (OrRxRxRy16 (SllX16 (LiRxImmX16 (HI16 imm:$imm)), 16), + (LiRxImmX16 (LO16 imm:$imm)))>; + +// Carry MipsPatterns +def : Mips16Pat<(subc CPU16Regs:$lhs, CPU16Regs:$rhs), + (SubuRxRyRz16 CPU16Regs:$lhs, CPU16Regs:$rhs)>; +def : Mips16Pat<(addc CPU16Regs:$lhs, CPU16Regs:$rhs), + (AdduRxRyRz16 CPU16Regs:$lhs, CPU16Regs:$rhs)>; +def : Mips16Pat<(addc CPU16Regs:$src, immSExt16:$imm), + (AddiuRxRxImmX16 CPU16Regs:$src, imm:$imm)>; + +// +// Some branch conditional patterns are not generated by llvm at this time. +// Some are for seemingly arbitrary reasons not used: i.e. with signed number +// comparison they are used and for unsigned a different pattern is used. +// I am pushing upstream from the full mips16 port and it seemed that I needed +// these earlier and the mips32 port has these but now I cannot create test +// cases that use these patterns. While I sort this all out I will leave these +// extra patterns commented out and if I can be sure they are really not used, +// I will delete the code. I don't want to check the code in uncommented without +// a valid test case. In some cases, the compiler is generating patterns with +// setcc instead and earlier I had implemented setcc first so may have masked +// the problem. The setcc variants are suboptimal for mips16 so I may wantto +// figure out how to enable the brcond patterns or else possibly new +// combinations of of brcond and setcc. +// +// +// bcond-seteq +// +def: Mips16Pat + <(brcond (i32 (seteq CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), + (BteqzT8CmpX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16) + >; + + +def: Mips16Pat + <(brcond (i32 (seteq CPU16Regs:$rx, immZExt16:$imm)), bb:$targ16), + (BteqzT8CmpiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$targ16) + >; + +def: Mips16Pat + <(brcond (i32 (seteq CPU16Regs:$rx, 0)), bb:$targ16), + (BeqzRxImmX16 CPU16Regs:$rx, bb:$targ16) + >; + +// +// bcond-setgt (do we need to have this pair of setlt, setgt??) +// +def: Mips16Pat + <(brcond (i32 (setgt CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), + (BtnezT8SltX16 CPU16Regs:$ry, CPU16Regs:$rx, bb:$imm16) + >; + +// +// bcond-setge +// +def: Mips16Pat + <(brcond (i32 (setge CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), + (BteqzT8SltX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16) + >; + +// +// never called because compiler transforms a >= k to a > (k-1) +def: Mips16Pat + <(brcond (i32 (setge CPU16Regs:$rx, immSExt16:$imm)), bb:$imm16), + (BteqzT8SltiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$imm16) + >; + +// +// bcond-setlt +// +def: Mips16Pat + <(brcond (i32 (setlt CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), + (BtnezT8SltX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16) + >; + +def: Mips16Pat + <(brcond (i32 (setlt CPU16Regs:$rx, immSExt16:$imm)), bb:$imm16), + (BtnezT8SltiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$imm16) + >; + +// +// bcond-setle +// +def: Mips16Pat + <(brcond (i32 (setle CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), + (BteqzT8SltX16 CPU16Regs:$ry, CPU16Regs:$rx, bb:$imm16) + >; + +// +// bcond-setne +// +def: Mips16Pat + <(brcond (i32 (setne CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), + (BtnezT8CmpX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16) + >; + +def: Mips16Pat + <(brcond (i32 (setne CPU16Regs:$rx, immZExt16:$imm)), bb:$targ16), + (BtnezT8CmpiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$targ16) + >; + +def: Mips16Pat + <(brcond (i32 (setne CPU16Regs:$rx, 0)), bb:$targ16), + (BnezRxImmX16 CPU16Regs:$rx, bb:$targ16) + >; + +// +// This needs to be there but I forget which code will generate it +// +def: Mips16Pat + <(brcond CPU16Regs:$rx, bb:$targ16), + (BnezRxImmX16 CPU16Regs:$rx, bb:$targ16) + >; + +// + +// +// bcond-setugt +// +//def: Mips16Pat +// <(brcond (i32 (setugt CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), +// (BtnezT8SltuX16 CPU16Regs:$ry, CPU16Regs:$rx, bb:$imm16) +// >; + +// +// bcond-setuge +// +//def: Mips16Pat +// <(brcond (i32 (setuge CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), +// (BteqzT8SltuX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16) +// >; + + +// +// bcond-setult +// +//def: Mips16Pat +// <(brcond (i32 (setult CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16), +// (BtnezT8SltuX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16) +// >; + +def: UncondBranch16_pat<br, BimmX16>; + // Small immediates +def: Mips16Pat<(i32 immSExt16:$in), + (AddiuRxRxImmX16 (Move32R16 ZERO), immSExt16:$in)>; + def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>; +// +// MipsDivRem +// +def: Mips16Pat + <(MipsDivRem CPU16Regs:$rx, CPU16Regs:$ry), + (DivRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>; + +// +// MipsDivRemU +// +def: Mips16Pat + <(MipsDivRemU CPU16Regs:$rx, CPU16Regs:$ry), + (DivuRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>; + +// signed a,b +// x = (a>=b)?x:y +// +// if !(a < b) x = y +// +def : Mips16Pat<(select (i32 (setge CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBteqZSlt CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a, CPU16Regs:$b)>; + +// signed a,b +// x = (a>b)?x:y +// +// if (b < a) x = y +// +def : Mips16Pat<(select (i32 (setgt CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBtneZSlt CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$b, CPU16Regs:$a)>; + +// unsigned a,b +// x = (a>=b)?x:y +// +// if !(a < b) x = y; +// +def : Mips16Pat< + (select (i32 (setuge CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBteqZSltu CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a, CPU16Regs:$b)>; + +// unsigned a,b +// x = (a>b)?x:y +// +// if (b < a) x = y +// +def : Mips16Pat<(select (i32 (setugt CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBtneZSltu CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$b, CPU16Regs:$a)>; + +// signed +// x = (a >= k)?x:y +// due to an llvm optimization, i don't think that this will ever +// be used. This is transformed into x = (a > k-1)?x:y +// +// + +//def : Mips16Pat< +// (select (i32 (setge CPU16Regs:$lhs, immSExt16:$rhs)), +// CPU16Regs:$T, CPU16Regs:$F), +// (SelTBteqZSlti CPU16Regs:$T, CPU16Regs:$F, +// CPU16Regs:$lhs, immSExt16:$rhs)>; + +//def : Mips16Pat< +// (select (i32 (setuge CPU16Regs:$lhs, immSExt16:$rhs)), +// CPU16Regs:$T, CPU16Regs:$F), +// (SelTBteqZSltiu CPU16Regs:$T, CPU16Regs:$F, +// CPU16Regs:$lhs, immSExt16:$rhs)>; + +// signed +// x = (a < k)?x:y +// +// if !(a < k) x = y; +// +def : Mips16Pat< + (select (i32 (setlt CPU16Regs:$a, immSExt16:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBtneZSlti CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a, immSExt16:$b)>; + + +// +// +// signed +// x = (a <= b)? x : y +// +// if (b < a) x = y +// +def : Mips16Pat<(select (i32 (setle CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBteqZSlt CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$b, CPU16Regs:$a)>; + +// +// unnsigned +// x = (a <= b)? x : y +// +// if (b < a) x = y +// +def : Mips16Pat<(select (i32 (setule CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBteqZSltu CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$b, CPU16Regs:$a)>; + +// +// signed/unsigned +// x = (a == b)? x : y +// +// if (a != b) x = y +// +def : Mips16Pat<(select (i32 (seteq CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBteqZCmp CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$b, CPU16Regs:$a)>; + +// +// signed/unsigned +// x = (a == 0)? x : y +// +// if (a != 0) x = y +// +def : Mips16Pat<(select (i32 (seteq CPU16Regs:$a, 0)), + CPU16Regs:$x, CPU16Regs:$y), + (SelBeqZ CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a)>; + + +// +// signed/unsigned +// x = (a == k)? x : y +// +// if (a != k) x = y +// +def : Mips16Pat<(select (i32 (seteq CPU16Regs:$a, immZExt16:$k)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBteqZCmpi CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a, immZExt16:$k)>; + + +// +// signed/unsigned +// x = (a != b)? x : y +// +// if (a == b) x = y +// +// +def : Mips16Pat<(select (i32 (setne CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBtneZCmp CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$b, CPU16Regs:$a)>; + +// +// signed/unsigned +// x = (a != 0)? x : y +// +// if (a == 0) x = y +// +def : Mips16Pat<(select (i32 (setne CPU16Regs:$a, 0)), + CPU16Regs:$x, CPU16Regs:$y), + (SelBneZ CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a)>; + +// signed/unsigned +// x = (a)? x : y +// +// if (!a) x = y +// +def : Mips16Pat<(select CPU16Regs:$a, + CPU16Regs:$x, CPU16Regs:$y), + (SelBneZ CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a)>; + + +// +// signed/unsigned +// x = (a != k)? x : y +// +// if (a == k) x = y +// +def : Mips16Pat<(select (i32 (setne CPU16Regs:$a, immZExt16:$k)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBtneZCmpi CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a, immZExt16:$k)>; + +// +// When writing C code to test setxx these patterns, +// some will be transformed into +// other things. So we test using C code but using -O3 and -O0 +// +// seteq +// +def : Mips16Pat + <(seteq CPU16Regs:$lhs,CPU16Regs:$rhs), + (SltiuCCRxImmX16 (XorRxRxRy16 CPU16Regs:$lhs, CPU16Regs:$rhs), 1)>; + +def : Mips16Pat + <(seteq CPU16Regs:$lhs, 0), + (SltiuCCRxImmX16 CPU16Regs:$lhs, 1)>; + + +// +// setge +// + +def: Mips16Pat + <(setge CPU16Regs:$lhs, CPU16Regs:$rhs), + (XorRxRxRy16 (SltCCRxRy16 CPU16Regs:$lhs, CPU16Regs:$rhs), + (LiRxImmX16 1))>; + +// +// For constants, llvm transforms this to: +// x > (k -1) and then reverses the operands to use setlt. So this pattern +// is not used now by the compiler. (Presumably checking that k-1 does not +// overflow). The compiler never uses this at a the current time, due to +// other optimizations. +// +//def: Mips16Pat +// <(setge CPU16Regs:$lhs, immSExt16:$rhs), +// (XorRxRxRy16 (SltiCCRxImmX16 CPU16Regs:$lhs, immSExt16:$rhs), +// (LiRxImmX16 1))>; + +// This catches the x >= -32768 case by transforming it to x > -32769 +// +def: Mips16Pat + <(setgt CPU16Regs:$lhs, -32769), + (XorRxRxRy16 (SltiCCRxImmX16 CPU16Regs:$lhs, -32768), + (LiRxImmX16 1))>; + +// +// setgt +// +// + +def: Mips16Pat + <(setgt CPU16Regs:$lhs, CPU16Regs:$rhs), + (SltCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs)>; + +// +// setle +// +def: Mips16Pat + <(setle CPU16Regs:$lhs, CPU16Regs:$rhs), + (XorRxRxRy16 (SltCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs), (LiRxImmX16 1))>; + +// +// setlt +// +def: SetCC_R16<setlt, SltCCRxRy16>; + +def: SetCC_I16<setlt, immSExt16, SltiCCRxImmX16>; + +// +// setne +// +def : Mips16Pat + <(setne CPU16Regs:$lhs,CPU16Regs:$rhs), + (SltuCCRxRy16 (LiRxImmX16 0), + (XorRxRxRy16 CPU16Regs:$lhs, CPU16Regs:$rhs))>; + + +// +// setuge +// +def: Mips16Pat + <(setuge CPU16Regs:$lhs, CPU16Regs:$rhs), + (XorRxRxRy16 (SltuCCRxRy16 CPU16Regs:$lhs, CPU16Regs:$rhs), + (LiRxImmX16 1))>; + +// this pattern will never be used because the compiler will transform +// x >= k to x > (k - 1) and then use SLT +// +//def: Mips16Pat +// <(setuge CPU16Regs:$lhs, immZExt16:$rhs), +// (XorRxRxRy16 (SltiuCCRxImmX16 CPU16Regs:$lhs, immZExt16:$rhs), +// (LiRxImmX16 1))>; + +// +// setugt +// +def: Mips16Pat + <(setugt CPU16Regs:$lhs, CPU16Regs:$rhs), + (SltuCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs)>; + +// +// setule +// +def: Mips16Pat + <(setule CPU16Regs:$lhs, CPU16Regs:$rhs), + (XorRxRxRy16 (SltuCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs), (LiRxImmX16 1))>; + +// +// setult +// +def: SetCC_R16<setult, SltuCCRxRy16>; + +def: SetCC_I16<setult, immSExt16, SltiuCCRxImmX16>; + def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)), (AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>; + +// hi/lo relocs + +def : Mips16Pat<(MipsHi tglobaltlsaddr:$in), + (SllX16 (LiRxImmX16 tglobaltlsaddr:$in), 16)>; + +// wrapper_pic +class Wrapper16Pat<SDNode node, Instruction ADDiuOp, RegisterClass RC>: + Mips16Pat<(MipsWrapper RC:$gp, node:$in), + (ADDiuOp RC:$gp, node:$in)>; + + +def : Wrapper16Pat<tglobaladdr, AddiuRxRxImmX16, CPU16Regs>; +def : Wrapper16Pat<tglobaltlsaddr, AddiuRxRxImmX16, CPU16Regs>; + +def : Mips16Pat<(i32 (extloadi8 addr16:$src)), + (LbuRxRyOffMemX16 addr16:$src)>; +def : Mips16Pat<(i32 (extloadi16 addr16:$src)), + (LhuRxRyOffMemX16 addr16:$src)>;
\ No newline at end of file diff --git a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp index c15d1bf..d7397a3 100644 --- a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "Mips16RegisterInfo.h" +#include "Mips16InstrInfo.h" #include "Mips.h" #include "MipsAnalyzeImmediate.h" #include "MipsInstrInfo.h" @@ -39,15 +40,27 @@ using namespace llvm; Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST, - const TargetInstrInfo &TII) - : MipsRegisterInfo(ST, TII) {} + const Mips16InstrInfo &I) + : MipsRegisterInfo(ST), TII(I) {} // This function eliminate ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions void Mips16RegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (!TFI->hasReservedCallFrame(MF)) { + int64_t Amount = I->getOperand(0).getImm(); + + if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) + Amount = -Amount; + + const Mips16InstrInfo *II = static_cast<const Mips16InstrInfo*>(&TII); + + II->adjustStackPtr(Mips::SP, Amount, MBB, I); + } + MBB.erase(I); } @@ -55,57 +68,60 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, int64_t SPOffset) const { - MachineInstr &MI = *II; - MachineFunction &MF = *MI.getParent()->getParent(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - int MinCSFI = 0; - int MaxCSFI = -1; - - if (CSI.size()) { - MinCSFI = CSI[0].getFrameIdx(); - MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); - } - - // The following stack frame objects are always - // referenced relative to $sp: - // 1. Outgoing arguments. - // 2. Pointer to dynamically allocated stack space. - // 3. Locations for callee-saved registers. - // Everything else is referenced relative to whatever register - // getFrameRegister() returns. - unsigned FrameReg; - - if (MipsFI->isOutArgFI(FrameIndex) || - (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) - FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + // The following stack frame objects are always + // referenced relative to $sp: + // 1. Outgoing arguments. + // 2. Pointer to dynamically allocated stack space. + // 3. Locations for callee-saved registers. + // Everything else is referenced relative to whatever register + // getFrameRegister() returns. + unsigned FrameReg; + + if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) + FrameReg = Mips::SP; + else { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + if (TFI->hasFP(MF)) { + FrameReg = Mips::S0; + } + else { + if ((MI.getNumOperands()> OpNo+2) && MI.getOperand(OpNo+2).isReg()) + FrameReg = MI.getOperand(OpNo+2).getReg(); else - FrameReg = getFrameRegister(MF); - - // Calculate final offset. - // - There is no need to change the offset if the frame object - // is one of the - // following: an outgoing argument, pointer to a dynamically allocated - // stack space or a $gp restore location, - // - If the frame object is any of the following, - // its offset must be adjusted - // by adding the size of the stack: - // incoming argument, callee-saved register location or local variable. - int64_t Offset; - - if (MipsFI->isOutArgFI(FrameIndex)) - Offset = SPOffset; - else - Offset = SPOffset + (int64_t)StackSize; - - Offset += MI.getOperand(OpNo + 1).getImm(); - - DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); - - MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); - MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); + FrameReg = Mips::SP; + } + } + // Calculate final offset. + // - There is no need to change the offset if the frame object + // is one of the + // following: an outgoing argument, pointer to a dynamically allocated + // stack space or a $gp restore location, + // - If the frame object is any of the following, + // its offset must be adjusted + // by adding the size of the stack: + // incoming argument, callee-saved register location or local variable. + int64_t Offset; + Offset = SPOffset + (int64_t)StackSize; + Offset += MI.getOperand(OpNo + 1).getImm(); + + + DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); + + MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); + MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); } diff --git a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.h b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.h index 3f4b3a7..153def2 100644 --- a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.h +++ b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.h @@ -17,11 +17,12 @@ #include "MipsRegisterInfo.h" namespace llvm { +class Mips16InstrInfo; class Mips16RegisterInfo : public MipsRegisterInfo { + const Mips16InstrInfo &TII; public: - Mips16RegisterInfo(const MipsSubtarget &Subtarget, - const TargetInstrInfo &TII); + Mips16RegisterInfo(const MipsSubtarget &Subtarget, const Mips16InstrInfo &TII); void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td index 20fc178..a611168 100644 --- a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td @@ -83,8 +83,10 @@ let usesCustomInserter = 1, Predicates = [HasMips64, HasStandardEncoding], //===----------------------------------------------------------------------===// let DecoderNamespace = "Mips64" in { /// Arithmetic Instructions (ALU Immediate) -def DADDiu : ArithLogicI<0x19, "daddiu", add, simm16_64, immSExt16, +def DADDi : ArithOverflowI<0x18, "daddi", add, simm16_64, immSExt16, CPU64Regs>; +def DADDiu : ArithLogicI<0x19, "daddiu", add, simm16_64, immSExt16, + CPU64Regs>, IsAsCheapAsAMove; def DANDi : ArithLogicI<0x0c, "andi", and, uimm16_64, immZExt16, CPU64Regs>; def SLTi64 : SetCC_I<0x0a, "slti", setlt, simm16_64, immSExt16, CPU64Regs>; def SLTiu64 : SetCC_I<0x0b, "sltiu", setult, simm16_64, immSExt16, CPU64Regs>; @@ -93,6 +95,7 @@ def XORi64 : ArithLogicI<0x0e, "xori", xor, uimm16_64, immZExt16, CPU64Regs>; def LUi64 : LoadUpper<0x0f, "lui", CPU64Regs, uimm16_64>; /// Arithmetic Instructions (3-Operand, R-Type) +def DADD : ArithOverflowR<0x00, 0x2C, "dadd", IIAlu, CPU64Regs, 1>; def DADDu : ArithLogicR<0x00, 0x2d, "daddu", add, IIAlu, CPU64Regs, 1>; def DSUBu : ArithLogicR<0x00, 0x2f, "dsubu", sub, IIAlu, CPU64Regs>; def SLT64 : SetCC_R<0x00, 0x2a, "slt", setlt, CPU64Regs>; @@ -110,9 +113,9 @@ def DSLLV : shift_rotate_reg<0x14, 0x00, "dsllv", shl, CPU64Regs>; def DSRLV : shift_rotate_reg<0x16, 0x00, "dsrlv", srl, CPU64Regs>; def DSRAV : shift_rotate_reg<0x17, 0x00, "dsrav", sra, CPU64Regs>; let Pattern = []<dag> in { -def DSLL32 : shift_rotate_imm64<0x3c, 0x00, "dsll32", shl>; -def DSRL32 : shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl>; -def DSRA32 : shift_rotate_imm64<0x3f, 0x00, "dsra32", sra>; + def DSLL32 : shift_rotate_imm64<0x3c, 0x00, "dsll32", shl>; + def DSRL32 : shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl>; + def DSRA32 : shift_rotate_imm64<0x3f, 0x00, "dsra32", sra>; } } // Rotate Instructions @@ -127,24 +130,15 @@ let DecoderNamespace = "Mips64" in { /// aligned defm LB64 : LoadM64<0x20, "lb", sextloadi8>; defm LBu64 : LoadM64<0x24, "lbu", zextloadi8>; -defm LH64 : LoadM64<0x21, "lh", sextloadi16_a>; -defm LHu64 : LoadM64<0x25, "lhu", zextloadi16_a>; -defm LW64 : LoadM64<0x23, "lw", sextloadi32_a>; -defm LWu64 : LoadM64<0x27, "lwu", zextloadi32_a>; +defm LH64 : LoadM64<0x21, "lh", sextloadi16>; +defm LHu64 : LoadM64<0x25, "lhu", zextloadi16>; +defm LW64 : LoadM64<0x23, "lw", sextloadi32>; +defm LWu64 : LoadM64<0x27, "lwu", zextloadi32>; defm SB64 : StoreM64<0x28, "sb", truncstorei8>; -defm SH64 : StoreM64<0x29, "sh", truncstorei16_a>; -defm SW64 : StoreM64<0x2b, "sw", truncstorei32_a>; -defm LD : LoadM64<0x37, "ld", load_a>; -defm SD : StoreM64<0x3f, "sd", store_a>; - -/// unaligned -defm ULH64 : LoadM64<0x21, "ulh", sextloadi16_u, 1>; -defm ULHu64 : LoadM64<0x25, "ulhu", zextloadi16_u, 1>; -defm ULW64 : LoadM64<0x23, "ulw", sextloadi32_u, 1>; -defm USH64 : StoreM64<0x29, "ush", truncstorei16_u, 1>; -defm USW64 : StoreM64<0x2b, "usw", truncstorei32_u, 1>; -defm ULD : LoadM64<0x37, "uld", load_u, 1>; -defm USD : StoreM64<0x3f, "usd", store_u, 1>; +defm SH64 : StoreM64<0x29, "sh", truncstorei16>; +defm SW64 : StoreM64<0x2b, "sw", truncstorei32>; +defm LD : LoadM64<0x37, "ld", load>; +defm SD : StoreM64<0x3f, "sd", store>; /// load/store left/right let isCodeGenOnly = 1 in { @@ -183,6 +177,7 @@ def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>; } let DecoderNamespace = "Mips64" in def JALR64 : JumpLinkReg<0x00, 0x09, "jalr", CPU64Regs>; +def TAILCALL64_R : JumpFR<CPU64Regs, MipsTailCall>, IsTailCall; let DecoderNamespace = "Mips64" in { /// Multiply and Divide Instructions. @@ -217,7 +212,15 @@ let DecoderNamespace = "Mips64" in { def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>; def DEXT : ExtBase<3, "dext", CPU64Regs>; +let Pattern = []<dag> in { + def DEXTU : ExtBase<2, "dextu", CPU64Regs>; + def DEXTM : ExtBase<1, "dextm", CPU64Regs>; +} def DINS : InsBase<7, "dins", CPU64Regs>; +let Pattern = []<dag> in { + def DINSU : InsBase<6, "dinsu", CPU64Regs>; + def DINSM : InsBase<5, "dinsm", CPU64Regs>; +} let isCodeGenOnly = 1, rs = 0, shamt = 0 in { def DSLL64_32 : FR<0x00, 0x3c, (outs CPU64Regs:$rd), (ins CPURegs:$rt), @@ -236,21 +239,14 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in { let Predicates = [NotN64, HasStandardEncoding] in { def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>; def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>; - def : MipsPat<(i64 (extloadi16_a addr:$src)), (LH64 addr:$src)>; - def : MipsPat<(i64 (extloadi16_u addr:$src)), (ULH64 addr:$src)>; - def : MipsPat<(i64 (extloadi32_a addr:$src)), (LW64 addr:$src)>; - def : MipsPat<(i64 (extloadi32_u addr:$src)), (ULW64 addr:$src)>; - def : MipsPat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>; + def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64 addr:$src)>; + def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64 addr:$src)>; } let Predicates = [IsN64, HasStandardEncoding] in { def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>; def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>; - def : MipsPat<(i64 (extloadi16_a addr:$src)), (LH64_P8 addr:$src)>; - def : MipsPat<(i64 (extloadi16_u addr:$src)), (ULH64_P8 addr:$src)>; - def : MipsPat<(i64 (extloadi32_a addr:$src)), (LW64_P8 addr:$src)>; - def : MipsPat<(i64 (extloadi32_u addr:$src)), (ULW64_P8 addr:$src)>; - def : MipsPat<(zextloadi32_u addr:$a), - (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>; + def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64_P8 addr:$src)>; } // hi/lo relocs @@ -315,3 +311,38 @@ def : MipsPat<(i64 (sext_inreg CPU64Regs:$src, i32)), // bswap MipsPattern def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; + +//===----------------------------------------------------------------------===// +// Instruction aliases +//===----------------------------------------------------------------------===// +def : InstAlias<"move $dst,$src", (DADD CPU64Regs:$dst,CPU64Regs:$src,ZERO_64)>; + +/// Move between CPU and coprocessor registers +let DecoderNamespace = "Mips64" in { +def MFC0_3OP64 : MFC3OP<0x10, 0, (outs CPU64Regs:$rt), + (ins CPU64Regs:$rd, uimm16:$sel),"mfc0\t$rt, $rd, $sel">; +def MTC0_3OP64 : MFC3OP<0x10, 4, (outs CPU64Regs:$rd, uimm16:$sel), + (ins CPU64Regs:$rt),"mtc0\t$rt, $rd, $sel">; +def MFC2_3OP64 : MFC3OP<0x12, 0, (outs CPU64Regs:$rt), + (ins CPU64Regs:$rd, uimm16:$sel),"mfc2\t$rt, $rd, $sel">; +def MTC2_3OP64 : MFC3OP<0x12, 4, (outs CPU64Regs:$rd, uimm16:$sel), + (ins CPU64Regs:$rt),"mtc2\t$rt, $rd, $sel">; +def DMFC0_3OP64 : MFC3OP<0x10, 1, (outs CPU64Regs:$rt), + (ins CPU64Regs:$rd, uimm16:$sel),"dmfc0\t$rt, $rd, $sel">; +def DMTC0_3OP64 : MFC3OP<0x10, 5, (outs CPU64Regs:$rd, uimm16:$sel), + (ins CPU64Regs:$rt),"dmtc0\t$rt, $rd, $sel">; +def DMFC2_3OP64 : MFC3OP<0x12, 1, (outs CPU64Regs:$rt), + (ins CPU64Regs:$rd, uimm16:$sel),"dmfc2\t$rt, $rd, $sel">; +def DMTC2_3OP64 : MFC3OP<0x12, 5, (outs CPU64Regs:$rd, uimm16:$sel), + (ins CPU64Regs:$rt),"dmtc2\t$rt, $rd, $sel">; +} +// Two operand (implicit 0 selector) versions: +def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; +def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; +def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; +def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; +def : InstAlias<"dmfc0 $rt, $rd", (DMFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; +def : InstAlias<"dmtc0 $rt, $rd", (DMTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; +def : InstAlias<"dmfc2 $rt, $rd", (DMFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; +def : InstAlias<"dmtc2 $rt, $rd", (DMTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; + diff --git a/contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp index dc8fbd0..99b163e 100644 --- a/contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp @@ -91,7 +91,7 @@ void MipsAnalyzeImmediate::ReplaceADDiuSLLWithLUi(InstSeq &Seq) { // Sign-extend and shift operand of ADDiu and see if it still fits in 16-bit. int64_t Imm = SignExtend64<16>(Seq[0].ImmOpnd); - int64_t ShiftedImm = Imm << (Seq[1].ImmOpnd - 16); + int64_t ShiftedImm = (uint64_t)Imm << (Seq[1].ImmOpnd - 16); if (!isInt<16>(ShiftedImm)) return; diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 00ff754..bf2818d 100644 --- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -37,7 +37,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" @@ -49,6 +49,13 @@ bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) { return true; } +bool MipsAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) { + MCOp = MCInstLowering.LowerOperand(MO); + return MCOp.isValid(); +} + +#include "MipsGenMCPseudoLowering.inc" + void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (MI->isDebugValue()) { SmallString<128> Str; @@ -58,24 +65,9 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } - // Direct object specific instruction lowering - if (!OutStreamer.hasRawTextSupport()) - switch (MI->getOpcode()) { - case Mips::DSLL: - case Mips::DSRL: - case Mips::DSRA: - assert(MI->getNumOperands() == 3 && - "Invalid no. of machine operands for shift!"); - assert(MI->getOperand(2).isImm()); - int64_t Shift = MI->getOperand(2).getImm(); - if (Shift > 31) { - MCInst TmpInst0; - MCInstLowering.LowerLargeShift(MI, TmpInst0, Shift - 32); - OutStreamer.EmitInstruction(TmpInst0); - return; - } - break; - } + // Do any auto-generated pseudo lowerings. + if (emitPseudoExpansionLowering(OutStreamer, MI)) + return; MachineBasicBlock::const_instr_iterator I = MI; MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); @@ -83,8 +75,9 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { do { MCInst TmpInst0; MCInstLowering.Lower(I++, TmpInst0); + OutStreamer.EmitInstruction(TmpInst0); - } while ((I != E) && I->isInsideBundle()); + } while ((I != E) && I->isInsideBundle()); // Delay slot check } //===----------------------------------------------------------------------===// @@ -214,7 +207,7 @@ const char *MipsAsmPrinter::getCurrentABIString() const { case MipsSubtarget::N32: return "abiN32"; case MipsSubtarget::N64: return "abi64"; case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64 - default: llvm_unreachable("Unknown Mips ABI");; + default: llvm_unreachable("Unknown Mips ABI"); } } @@ -246,8 +239,7 @@ void MipsAsmPrinter::EmitFunctionBodyStart() { OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder")); OutStreamer.EmitRawText(StringRef("\t.set\tnomacro")); - if (MipsFI->getEmitNOAT()) - OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); + OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); } } @@ -258,9 +250,7 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() { // always be at the function end, and we can't emit and // break with BB logic. if (OutStreamer.hasRawTextSupport()) { - if (MipsFI->getEmitNOAT()) - OutStreamer.EmitRawText(StringRef("\t.set\tat")); - + OutStreamer.EmitRawText(StringRef("\t.set\tat")); OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); OutStreamer.EmitRawText(StringRef("\t.set\treorder")); OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName())); diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h index 562bf9c..94d8bfa 100644 --- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h +++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h @@ -32,6 +32,14 @@ class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter { void EmitInstrWithMacroNoAT(const MachineInstr *MI); +private: + // tblgen'erated function. + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, + const MachineInstr *MI); + + // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); + public: const MipsSubtarget *Subtarget; diff --git a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td index 19213fa..78cf140 100644 --- a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td +++ b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td @@ -35,9 +35,6 @@ def RetCC_MipsO32 : CallingConv<[ //===----------------------------------------------------------------------===// def CC_MipsN : CallingConv<[ - // Handles byval parameters. - CCIfByVal<CCCustom<"CC_Mips64Byval">>, - // Promote i8/i16 arguments to i32. CCIfType<[i8, i16], CCPromoteToType<i32>>, @@ -72,9 +69,6 @@ def CC_MipsN : CallingConv<[ // N32/64 variable arguments. // All arguments are passed in integer registers. def CC_MipsN_VarArg : CallingConv<[ - // Handles byval parameters. - CCIfByVal<CCCustom<"CC_Mips64Byval">>, - // Promote i8/i16 arguments to i32. CCIfType<[i8, i16], CCPromoteToType<i32>>, @@ -211,12 +205,6 @@ def CC_Mips_FastCC : CallingConv<[ // Mips Calling Convention Dispatch //===----------------------------------------------------------------------===// -def CC_Mips : CallingConv<[ - CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>, - CCIfSubtarget<"isABI_N32()", CCDelegateTo<CC_MipsN>>, - CCIfSubtarget<"isABI_N64()", CCDelegateTo<CC_MipsN>> -]>; - def RetCC_Mips : CallingConv<[ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>, CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>, diff --git a/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp index cb7022b..4bfccd8 100644 --- a/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp @@ -30,7 +30,6 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" -#include "llvm/Function.h" #include "llvm/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -48,7 +47,7 @@ namespace { class MipsCodeEmitter : public MachineFunctionPass { MipsJITInfo *JTI; const MipsInstrInfo *II; - const TargetData *TD; + const DataLayout *TD; const MipsSubtarget *Subtarget; TargetMachine &TM; JITCodeEmitter &MCE; @@ -67,7 +66,7 @@ class MipsCodeEmitter : public MachineFunctionPass { MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) : MachineFunctionPass(ID), JTI(0), II((const MipsInstrInfo *) tm.getInstrInfo()), - TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), + TD(tm.getDataLayout()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), IsPIC(TM.getRelocationModel() == Reloc::PIC_) { } @@ -129,7 +128,7 @@ char MipsCodeEmitter::ID = 0; bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { JTI = ((MipsTargetMachine&) MF.getTarget()).getJITInfo(); II = ((const MipsTargetMachine&) MF.getTarget()).getInstrInfo(); - TD = ((const MipsTargetMachine&) MF.getTarget()).getTargetData(); + TD = ((const MipsTargetMachine&) MF.getTarget()).getDataLayout(); Subtarget = &TM.getSubtarget<MipsSubtarget> (); MCPEs = &MF.getConstantPool()->getConstants(); MJTEs = 0; @@ -139,7 +138,7 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { do { DEBUG(errs() << "JITTing function '" - << MF.getFunction()->getName() << "'\n"); + << MF.getName() << "'\n"); MCE.startFunction(MF); for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); @@ -219,15 +218,9 @@ unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI, return getMipsRegisterNumbering(MO.getReg()); else if (MO.isImm()) return static_cast<unsigned>(MO.getImm()); - else if (MO.isGlobal()) { - if (MI.getOpcode() == Mips::ULW || MI.getOpcode() == Mips::USW || - MI.getOpcode() == Mips::ULH || MI.getOpcode() == Mips::ULHu) - emitGlobalAddressUnaligned(MO.getGlobal(), getRelocation(MI, MO), 4); - else if (MI.getOpcode() == Mips::USH) - emitGlobalAddressUnaligned(MO.getGlobal(), getRelocation(MI, MO), 8); - else - emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true); - } else if (MO.isSymbol()) + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true); + else if (MO.isSymbol()) emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO)); else if (MO.isCPI()) emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO)); @@ -384,29 +377,8 @@ void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) { if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) return; - - switch (MI.getOpcode()) { - case Mips::USW: - NumEmitted += emitUSW(MI); - break; - case Mips::ULW: - NumEmitted += emitULW(MI); - break; - case Mips::ULH: - NumEmitted += emitULH(MI); - break; - case Mips::ULHu: - NumEmitted += emitULHu(MI); - break; - case Mips::USH: - NumEmitted += emitUSH(MI); - break; - - default: - emitWordLE(getBinaryCodeForInstr(MI)); - ++NumEmitted; // Keep track of the # of mi's emitted - break; - } + emitWordLE(getBinaryCodeForInstr(MI)); + ++NumEmitted; // Keep track of the # of mi's emitted MCE.processDebugLoc(MI.getDebugLoc(), false); } diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td new file mode 100644 index 0000000..8e01d06 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td @@ -0,0 +1,309 @@ +//===- MipsDSPInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def HasDSP : Predicate<"Subtarget.hasDSP()">, + AssemblerPredicate<"FeatureDSP">; +def HasDSPR2 : Predicate<"Subtarget.hasDSPR2()">, + AssemblerPredicate<"FeatureDSPR2">; + +// Fields. +class Field6<bits<6> val> { + bits<6> V = val; +} + +def SPECIAL3_OPCODE : Field6<0b011111>; +def REGIMM_OPCODE : Field6<0b000001>; + +class DSPInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> { + let Predicates = [HasDSP]; +} + +class PseudoDSP<dag outs, dag ins, list<dag> pattern>: + MipsPseudo<outs, ins, "", pattern> { + let Predicates = [HasDSP]; +} + +// ADDU.QB sub-class format. +class ADDU_QB_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> rs; + bits<5> rt; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010000; +} + +class RADDU_W_QB_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> rs; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = 0; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010000; +} + +// CMPU.EQ.QB sub-class format. +class CMP_EQ_QB_R2_FMT<bits<5> op> : DSPInst { + bits<5> rs; + bits<5> rt; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = 0; + let Inst{10-6} = op; + let Inst{5-0} = 0b010001; +} + +class CMP_EQ_QB_R3_FMT<bits<5> op> : DSPInst { + bits<5> rs; + bits<5> rt; + bits<5> rd; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010001; +} + +class PRECR_SRA_PH_W_FMT<bits<5> op> : DSPInst { + bits<5> rs; + bits<5> rt; + bits<5> sa; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = sa; + let Inst{10-6} = op; + let Inst{5-0} = 0b010001; +} + +// ABSQ_S.PH sub-class format. +class ABSQ_S_PH_R2_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> rt; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = 0; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010010; +} + + +class REPL_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<10> imm; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-16} = imm; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010010; +} + +// SHLL.QB sub-class format. +class SHLL_QB_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> rt; + bits<5> rs_sa; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs_sa; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010011; +} + +// LX sub-class format. +class LX_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> base; + bits<5> index; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = base; + let Inst{20-16} = index; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b001010; +} + +// ADDUH.QB sub-class format. +class ADDUH_QB_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> rs; + bits<5> rt; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b011000; +} + +// APPEND sub-class format. +class APPEND_FMT<bits<5> op> : DSPInst { + bits<5> rt; + bits<5> rs; + bits<5> sa; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = sa; + let Inst{10-6} = op; + let Inst{5-0} = 0b110001; +} + +// DPA.W.PH sub-class format. +class DPA_W_PH_FMT<bits<5> op> : DSPInst { + bits<2> ac; + bits<5> rs; + bits<5> rt; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = op; + let Inst{5-0} = 0b110000; +} + +// MULT sub-class format. +class MULT_FMT<bits<6> opcode, bits<6> funct> : DSPInst { + bits<2> ac; + bits<5> rs; + bits<5> rt; + + let Opcode = opcode; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = 0; + let Inst{5-0} = funct; +} + +// EXTR.W sub-class format (type 1). +class EXTR_W_TY1_FMT<bits<5> op> : DSPInst { + bits<5> rt; + bits<2> ac; + bits<5> shift_rs; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = shift_rs; + let Inst{20-16} = rt; + let Inst{15-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = op; + let Inst{5-0} = 0b111000; +} + +// SHILO sub-class format. +class SHILO_R1_FMT<bits<5> op> : DSPInst { + bits<2> ac; + bits<6> shift; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-20} = shift; + let Inst{19-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = op; + let Inst{5-0} = 0b111000; +} + +class SHILO_R2_FMT<bits<5> op> : DSPInst { + bits<2> ac; + bits<5> rs; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = op; + let Inst{5-0} = 0b111000; +} + +class RDDSP_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<10> mask; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-16} = mask; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b111000; +} + +class WRDSP_FMT<bits<5> op> : DSPInst { + bits<5> rs; + bits<10> mask; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-11} = mask; + let Inst{10-6} = op; + let Inst{5-0} = 0b111000; +} + +class BPOSGE32_FMT<bits<5> op> : DSPInst { + bits<16> offset; + + let Opcode = REGIMM_OPCODE.V; + + let Inst{25-21} = 0; + let Inst{20-16} = op; + let Inst{15-0} = offset; +} + +// INSV sub-class format. +class INSV_FMT<bits<6> op> : DSPInst { + bits<5> rt; + bits<5> rs; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-6} = 0; + let Inst{5-0} = op; +} diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td new file mode 100644 index 0000000..ef94028 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td @@ -0,0 +1,1319 @@ +//===- MipsDSPInstrInfo.td - DSP ASE instructions -*- tablegen ------------*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes Mips DSP ASE instructions. +// +//===----------------------------------------------------------------------===// + +// ImmLeaf +def immZExt2 : ImmLeaf<i32, [{return isUInt<2>(Imm);}]>; +def immZExt3 : ImmLeaf<i32, [{return isUInt<3>(Imm);}]>; +def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>; +def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>; +def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>; +def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>; + +// Mips-specific dsp nodes +def SDT_MipsExtr : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>; +def SDT_MipsShilo : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def SDT_MipsDPA : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>; + +class MipsDSPBase<string Opc, SDTypeProfile Prof> : + SDNode<!strconcat("MipsISD::", Opc), Prof, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + +class MipsDSPSideEffectBase<string Opc, SDTypeProfile Prof> : + SDNode<!strconcat("MipsISD::", Opc), Prof, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPSideEffect]>; + +def MipsEXTP : MipsDSPSideEffectBase<"EXTP", SDT_MipsExtr>; +def MipsEXTPDP : MipsDSPSideEffectBase<"EXTPDP", SDT_MipsExtr>; +def MipsEXTR_S_H : MipsDSPSideEffectBase<"EXTR_S_H", SDT_MipsExtr>; +def MipsEXTR_W : MipsDSPSideEffectBase<"EXTR_W", SDT_MipsExtr>; +def MipsEXTR_R_W : MipsDSPSideEffectBase<"EXTR_R_W", SDT_MipsExtr>; +def MipsEXTR_RS_W : MipsDSPSideEffectBase<"EXTR_RS_W", SDT_MipsExtr>; + +def MipsSHILO : MipsDSPBase<"SHILO", SDT_MipsShilo>; +def MipsMTHLIP : MipsDSPBase<"MTHLIP", SDT_MipsShilo>; + +def MipsMULSAQ_S_W_PH : MipsDSPSideEffectBase<"MULSAQ_S_W_PH", SDT_MipsDPA>; +def MipsMAQ_S_W_PHL : MipsDSPSideEffectBase<"MAQ_S_W_PHL", SDT_MipsDPA>; +def MipsMAQ_S_W_PHR : MipsDSPSideEffectBase<"MAQ_S_W_PHR", SDT_MipsDPA>; +def MipsMAQ_SA_W_PHL : MipsDSPSideEffectBase<"MAQ_SA_W_PHL", SDT_MipsDPA>; +def MipsMAQ_SA_W_PHR : MipsDSPSideEffectBase<"MAQ_SA_W_PHR", SDT_MipsDPA>; + +def MipsDPAU_H_QBL : MipsDSPBase<"DPAU_H_QBL", SDT_MipsDPA>; +def MipsDPAU_H_QBR : MipsDSPBase<"DPAU_H_QBR", SDT_MipsDPA>; +def MipsDPSU_H_QBL : MipsDSPBase<"DPSU_H_QBL", SDT_MipsDPA>; +def MipsDPSU_H_QBR : MipsDSPBase<"DPSU_H_QBR", SDT_MipsDPA>; +def MipsDPAQ_S_W_PH : MipsDSPSideEffectBase<"DPAQ_S_W_PH", SDT_MipsDPA>; +def MipsDPSQ_S_W_PH : MipsDSPSideEffectBase<"DPSQ_S_W_PH", SDT_MipsDPA>; +def MipsDPAQ_SA_L_W : MipsDSPSideEffectBase<"DPAQ_SA_L_W", SDT_MipsDPA>; +def MipsDPSQ_SA_L_W : MipsDSPSideEffectBase<"DPSQ_SA_L_W", SDT_MipsDPA>; + +def MipsDPA_W_PH : MipsDSPBase<"DPA_W_PH", SDT_MipsDPA>; +def MipsDPS_W_PH : MipsDSPBase<"DPS_W_PH", SDT_MipsDPA>; +def MipsDPAQX_S_W_PH : MipsDSPSideEffectBase<"DPAQX_S_W_PH", SDT_MipsDPA>; +def MipsDPAQX_SA_W_PH : MipsDSPSideEffectBase<"DPAQX_SA_W_PH", SDT_MipsDPA>; +def MipsDPAX_W_PH : MipsDSPBase<"DPAX_W_PH", SDT_MipsDPA>; +def MipsDPSX_W_PH : MipsDSPBase<"DPSX_W_PH", SDT_MipsDPA>; +def MipsDPSQX_S_W_PH : MipsDSPSideEffectBase<"DPSQX_S_W_PH", SDT_MipsDPA>; +def MipsDPSQX_SA_W_PH : MipsDSPSideEffectBase<"DPSQX_SA_W_PH", SDT_MipsDPA>; +def MipsMULSA_W_PH : MipsDSPBase<"MULSA_W_PH", SDT_MipsDPA>; + +def MipsMULT : MipsDSPBase<"MULT", SDT_MipsDPA>; +def MipsMULTU : MipsDSPBase<"MULTU", SDT_MipsDPA>; +def MipsMADD_DSP : MipsDSPBase<"MADD_DSP", SDT_MipsDPA>; +def MipsMADDU_DSP : MipsDSPBase<"MADDU_DSP", SDT_MipsDPA>; +def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>; +def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>; + +// Flags. +class IsCommutable { + bit isCommutable = 1; +} + +class UseAC { + list<Register> Uses = [AC0]; +} + +class UseDSPCtrl { + list<Register> Uses = [DSPCtrl]; +} + +class ClearDefs { + list<Register> Defs = []; +} + +// Instruction encoding. +class ADDU_QB_ENC : ADDU_QB_FMT<0b00000>; +class ADDU_S_QB_ENC : ADDU_QB_FMT<0b00100>; +class SUBU_QB_ENC : ADDU_QB_FMT<0b00001>; +class SUBU_S_QB_ENC : ADDU_QB_FMT<0b00101>; +class ADDQ_PH_ENC : ADDU_QB_FMT<0b01010>; +class ADDQ_S_PH_ENC : ADDU_QB_FMT<0b01110>; +class SUBQ_PH_ENC : ADDU_QB_FMT<0b01011>; +class SUBQ_S_PH_ENC : ADDU_QB_FMT<0b01111>; +class ADDQ_S_W_ENC : ADDU_QB_FMT<0b10110>; +class SUBQ_S_W_ENC : ADDU_QB_FMT<0b10111>; +class ADDSC_ENC : ADDU_QB_FMT<0b10000>; +class ADDWC_ENC : ADDU_QB_FMT<0b10001>; +class MODSUB_ENC : ADDU_QB_FMT<0b10010>; +class RADDU_W_QB_ENC : RADDU_W_QB_FMT<0b10100>; +class ABSQ_S_PH_ENC : ABSQ_S_PH_R2_FMT<0b01001>; +class ABSQ_S_W_ENC : ABSQ_S_PH_R2_FMT<0b10001>; +class PRECRQ_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01100>; +class PRECRQ_PH_W_ENC : CMP_EQ_QB_R3_FMT<0b10100>; +class PRECRQ_RS_PH_W_ENC : CMP_EQ_QB_R3_FMT<0b10101>; +class PRECRQU_S_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01111>; +class PRECEQ_W_PHL_ENC : ABSQ_S_PH_R2_FMT<0b01100>; +class PRECEQ_W_PHR_ENC : ABSQ_S_PH_R2_FMT<0b01101>; +class PRECEQU_PH_QBL_ENC : ABSQ_S_PH_R2_FMT<0b00100>; +class PRECEQU_PH_QBR_ENC : ABSQ_S_PH_R2_FMT<0b00101>; +class PRECEQU_PH_QBLA_ENC : ABSQ_S_PH_R2_FMT<0b00110>; +class PRECEQU_PH_QBRA_ENC : ABSQ_S_PH_R2_FMT<0b00111>; +class PRECEU_PH_QBL_ENC : ABSQ_S_PH_R2_FMT<0b11100>; +class PRECEU_PH_QBR_ENC : ABSQ_S_PH_R2_FMT<0b11101>; +class PRECEU_PH_QBLA_ENC : ABSQ_S_PH_R2_FMT<0b11110>; +class PRECEU_PH_QBRA_ENC : ABSQ_S_PH_R2_FMT<0b11111>; +class SHLL_QB_ENC : SHLL_QB_FMT<0b00000>; +class SHLLV_QB_ENC : SHLL_QB_FMT<0b00010>; +class SHRL_QB_ENC : SHLL_QB_FMT<0b00001>; +class SHRLV_QB_ENC : SHLL_QB_FMT<0b00011>; +class SHLL_PH_ENC : SHLL_QB_FMT<0b01000>; +class SHLLV_PH_ENC : SHLL_QB_FMT<0b01010>; +class SHLL_S_PH_ENC : SHLL_QB_FMT<0b01100>; +class SHLLV_S_PH_ENC : SHLL_QB_FMT<0b01110>; +class SHRA_PH_ENC : SHLL_QB_FMT<0b01001>; +class SHRAV_PH_ENC : SHLL_QB_FMT<0b01011>; +class SHRA_R_PH_ENC : SHLL_QB_FMT<0b01101>; +class SHRAV_R_PH_ENC : SHLL_QB_FMT<0b01111>; +class SHLL_S_W_ENC : SHLL_QB_FMT<0b10100>; +class SHLLV_S_W_ENC : SHLL_QB_FMT<0b10110>; +class SHRA_R_W_ENC : SHLL_QB_FMT<0b10101>; +class SHRAV_R_W_ENC : SHLL_QB_FMT<0b10111>; +class MULEU_S_PH_QBL_ENC : ADDU_QB_FMT<0b00110>; +class MULEU_S_PH_QBR_ENC : ADDU_QB_FMT<0b00111>; +class MULEQ_S_W_PHL_ENC : ADDU_QB_FMT<0b11100>; +class MULEQ_S_W_PHR_ENC : ADDU_QB_FMT<0b11101>; +class MULQ_RS_PH_ENC : ADDU_QB_FMT<0b11111>; +class MULSAQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00110>; +class MAQ_S_W_PHL_ENC : DPA_W_PH_FMT<0b10100>; +class MAQ_S_W_PHR_ENC : DPA_W_PH_FMT<0b10110>; +class MAQ_SA_W_PHL_ENC : DPA_W_PH_FMT<0b10000>; +class MAQ_SA_W_PHR_ENC : DPA_W_PH_FMT<0b10010>; +class DPAU_H_QBL_ENC : DPA_W_PH_FMT<0b00011>; +class DPAU_H_QBR_ENC : DPA_W_PH_FMT<0b00111>; +class DPSU_H_QBL_ENC : DPA_W_PH_FMT<0b01011>; +class DPSU_H_QBR_ENC : DPA_W_PH_FMT<0b01111>; +class DPAQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00100>; +class DPSQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00101>; +class DPAQ_SA_L_W_ENC : DPA_W_PH_FMT<0b01100>; +class DPSQ_SA_L_W_ENC : DPA_W_PH_FMT<0b01101>; +class MULT_DSP_ENC : MULT_FMT<0b000000, 0b011000>; +class MULTU_DSP_ENC : MULT_FMT<0b000000, 0b011001>; +class MADD_DSP_ENC : MULT_FMT<0b011100, 0b000000>; +class MADDU_DSP_ENC : MULT_FMT<0b011100, 0b000001>; +class MSUB_DSP_ENC : MULT_FMT<0b011100, 0b000100>; +class MSUBU_DSP_ENC : MULT_FMT<0b011100, 0b000101>; +class CMPU_EQ_QB_ENC : CMP_EQ_QB_R2_FMT<0b00000>; +class CMPU_LT_QB_ENC : CMP_EQ_QB_R2_FMT<0b00001>; +class CMPU_LE_QB_ENC : CMP_EQ_QB_R2_FMT<0b00010>; +class CMPGU_EQ_QB_ENC : CMP_EQ_QB_R3_FMT<0b00100>; +class CMPGU_LT_QB_ENC : CMP_EQ_QB_R3_FMT<0b00101>; +class CMPGU_LE_QB_ENC : CMP_EQ_QB_R3_FMT<0b00110>; +class CMP_EQ_PH_ENC : CMP_EQ_QB_R2_FMT<0b01000>; +class CMP_LT_PH_ENC : CMP_EQ_QB_R2_FMT<0b01001>; +class CMP_LE_PH_ENC : CMP_EQ_QB_R2_FMT<0b01010>; +class BITREV_ENC : ABSQ_S_PH_R2_FMT<0b11011>; +class PACKRL_PH_ENC : CMP_EQ_QB_R3_FMT<0b01110>; +class REPL_QB_ENC : REPL_FMT<0b00010>; +class REPL_PH_ENC : REPL_FMT<0b01010>; +class REPLV_QB_ENC : ABSQ_S_PH_R2_FMT<0b00011>; +class REPLV_PH_ENC : ABSQ_S_PH_R2_FMT<0b01011>; +class PICK_QB_ENC : CMP_EQ_QB_R3_FMT<0b00011>; +class PICK_PH_ENC : CMP_EQ_QB_R3_FMT<0b01011>; +class LWX_ENC : LX_FMT<0b00000>; +class LHX_ENC : LX_FMT<0b00100>; +class LBUX_ENC : LX_FMT<0b00110>; +class BPOSGE32_ENC : BPOSGE32_FMT<0b11100>; +class INSV_ENC : INSV_FMT<0b001100>; + +class EXTP_ENC : EXTR_W_TY1_FMT<0b00010>; +class EXTPV_ENC : EXTR_W_TY1_FMT<0b00011>; +class EXTPDP_ENC : EXTR_W_TY1_FMT<0b01010>; +class EXTPDPV_ENC : EXTR_W_TY1_FMT<0b01011>; +class EXTR_W_ENC : EXTR_W_TY1_FMT<0b00000>; +class EXTRV_W_ENC : EXTR_W_TY1_FMT<0b00001>; +class EXTR_R_W_ENC : EXTR_W_TY1_FMT<0b00100>; +class EXTRV_R_W_ENC : EXTR_W_TY1_FMT<0b00101>; +class EXTR_RS_W_ENC : EXTR_W_TY1_FMT<0b00110>; +class EXTRV_RS_W_ENC : EXTR_W_TY1_FMT<0b00111>; +class EXTR_S_H_ENC : EXTR_W_TY1_FMT<0b01110>; +class EXTRV_S_H_ENC : EXTR_W_TY1_FMT<0b01111>; +class SHILO_ENC : SHILO_R1_FMT<0b11010>; +class SHILOV_ENC : SHILO_R2_FMT<0b11011>; +class MTHLIP_ENC : SHILO_R2_FMT<0b11111>; + +class RDDSP_ENC : RDDSP_FMT<0b10010>; +class WRDSP_ENC : WRDSP_FMT<0b10011>; +class ADDU_PH_ENC : ADDU_QB_FMT<0b01000>; +class ADDU_S_PH_ENC : ADDU_QB_FMT<0b01100>; +class SUBU_PH_ENC : ADDU_QB_FMT<0b01001>; +class SUBU_S_PH_ENC : ADDU_QB_FMT<0b01101>; +class CMPGDU_EQ_QB_ENC : CMP_EQ_QB_R3_FMT<0b11000>; +class CMPGDU_LT_QB_ENC : CMP_EQ_QB_R3_FMT<0b11001>; +class CMPGDU_LE_QB_ENC : CMP_EQ_QB_R3_FMT<0b11010>; +class ABSQ_S_QB_ENC : ABSQ_S_PH_R2_FMT<0b00001>; +class ADDUH_QB_ENC : ADDUH_QB_FMT<0b00000>; +class ADDUH_R_QB_ENC : ADDUH_QB_FMT<0b00010>; +class SUBUH_QB_ENC : ADDUH_QB_FMT<0b00001>; +class SUBUH_R_QB_ENC : ADDUH_QB_FMT<0b00011>; +class ADDQH_PH_ENC : ADDUH_QB_FMT<0b01000>; +class ADDQH_R_PH_ENC : ADDUH_QB_FMT<0b01010>; +class SUBQH_PH_ENC : ADDUH_QB_FMT<0b01001>; +class SUBQH_R_PH_ENC : ADDUH_QB_FMT<0b01011>; +class ADDQH_W_ENC : ADDUH_QB_FMT<0b10000>; +class ADDQH_R_W_ENC : ADDUH_QB_FMT<0b10010>; +class SUBQH_W_ENC : ADDUH_QB_FMT<0b10001>; +class SUBQH_R_W_ENC : ADDUH_QB_FMT<0b10011>; +class MUL_PH_ENC : ADDUH_QB_FMT<0b01100>; +class MUL_S_PH_ENC : ADDUH_QB_FMT<0b01110>; +class MULQ_S_W_ENC : ADDUH_QB_FMT<0b10110>; +class MULQ_RS_W_ENC : ADDUH_QB_FMT<0b10111>; +class MULQ_S_PH_ENC : ADDU_QB_FMT<0b11110>; +class DPA_W_PH_ENC : DPA_W_PH_FMT<0b00000>; +class DPS_W_PH_ENC : DPA_W_PH_FMT<0b00001>; +class DPAQX_S_W_PH_ENC : DPA_W_PH_FMT<0b11000>; +class DPAQX_SA_W_PH_ENC : DPA_W_PH_FMT<0b11010>; +class DPAX_W_PH_ENC : DPA_W_PH_FMT<0b01000>; +class DPSX_W_PH_ENC : DPA_W_PH_FMT<0b01001>; +class DPSQX_S_W_PH_ENC : DPA_W_PH_FMT<0b11001>; +class DPSQX_SA_W_PH_ENC : DPA_W_PH_FMT<0b11011>; +class MULSA_W_PH_ENC : DPA_W_PH_FMT<0b00010>; +class PRECR_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01101>; +class PRECR_SRA_PH_W_ENC : PRECR_SRA_PH_W_FMT<0b11110>; +class PRECR_SRA_R_PH_W_ENC : PRECR_SRA_PH_W_FMT<0b11111>; +class SHRA_QB_ENC : SHLL_QB_FMT<0b00100>; +class SHRAV_QB_ENC : SHLL_QB_FMT<0b00110>; +class SHRA_R_QB_ENC : SHLL_QB_FMT<0b00101>; +class SHRAV_R_QB_ENC : SHLL_QB_FMT<0b00111>; +class SHRL_PH_ENC : SHLL_QB_FMT<0b11001>; +class SHRLV_PH_ENC : SHLL_QB_FMT<0b11011>; +class APPEND_ENC : APPEND_FMT<0b00000>; +class BALIGN_ENC : APPEND_FMT<0b10000>; +class PREPEND_ENC : APPEND_FMT<0b00001>; + +// Instruction desc. +class ADDU_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCD, + RegisterClass RCS, RegisterClass RCT = RCS> { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCS:$rs, RCT:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); + list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCD, + RegisterClass RCS = RCD> { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCS:$rs); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs"); + list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCS, + RegisterClass RCT = RCS> { + dag OutOperandList = (outs); + dag InOperandList = (ins RCS:$rs, RCT:$rt); + string AsmString = !strconcat(instr_asm, "\t$rs, $rt"); + list<dag> Pattern = [(OpNode RCS:$rs, RCT:$rt)]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCD, + RegisterClass RCS, RegisterClass RCT = RCS> { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCS:$rs, RCT:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); + list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCT, + RegisterClass RCS = RCT> { + dag OutOperandList = (outs RCT:$rt); + dag InOperandList = (ins RCS:$rs, shamt:$sa, RCS:$src); + string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); + list<dag> Pattern = [(set RCT:$rt, (OpNode RCS:$src, RCS:$rs, immZExt5:$sa))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; + string Constraints = "$src = $rt"; +} + +class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCD, + RegisterClass RCT = RCD> { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCT:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rt"); + list<dag> Pattern = [(set RCD:$rd, (OpNode RCT:$rt))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + ImmLeaf immPat, InstrItinClass itin, RegisterClass RC> { + dag OutOperandList = (outs RC:$rd); + dag InOperandList = (ins uimm16:$imm); + string AsmString = !strconcat(instr_asm, "\t$rd, $imm"); + list<dag> Pattern = [(set RC:$rd, (OpNode immPat:$imm))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RC> { + dag OutOperandList = (outs RC:$rd); + dag InOperandList = (ins RC:$rt, CPURegs:$rs_sa); + string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa"); + list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs_sa))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + SDPatternOperator ImmPat, InstrItinClass itin, + RegisterClass RC> { + dag OutOperandList = (outs RC:$rd); + dag InOperandList = (ins RC:$rt, uimm16:$rs_sa); + string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa"); + list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, ImmPat:$rs_sa))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rd); + dag InOperandList = (ins CPURegs:$base, CPURegs:$index); + string AsmString = !strconcat(instr_asm, "\t$rd, ${index}(${base})"); + list<dag> Pattern = [(set CPURegs:$rd, + (OpNode CPURegs:$base, CPURegs:$index))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; + bit mayLoad = 1; +} + +class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCD, + RegisterClass RCS = RCD, RegisterClass RCT = RCD> { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCS:$rs, RCT:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); + list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + SDPatternOperator ImmOp, InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rt); + dag InOperandList = (ins CPURegs:$rs, shamt:$sa, CPURegs:$src); + string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); + list<dag> Pattern = [(set CPURegs:$rt, + (OpNode CPURegs:$src, CPURegs:$rs, ImmOp:$sa))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; + string Constraints = "$src = $rt"; +} + +class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rt); + dag InOperandList = (ins ACRegs:$ac, CPURegs:$shift_rs); + string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rt); + dag InOperandList = (ins ACRegs:$ac, uimm16:$shift_rs); + string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class SHILO_R1_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, + Instruction realinst> : + PseudoDSP<(outs), (ins simm16:$shift), [(OpNode immSExt6:$shift)]>, + PseudoInstExpansion<(realinst AC0, simm16:$shift)> { + list<Register> Defs = [DSPCtrl, AC0]; + list<Register> Uses = [AC0]; + InstrItinClass Itinerary = itin; +} + +class SHILO_R1_DESC_BASE<string instr_asm> { + dag OutOperandList = (outs ACRegs:$ac); + dag InOperandList = (ins simm16:$shift); + string AsmString = !strconcat(instr_asm, "\t$ac, $shift"); +} + +class SHILO_R2_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, + Instruction realinst> : + PseudoDSP<(outs), (ins CPURegs:$rs), [(OpNode CPURegs:$rs)]>, + PseudoInstExpansion<(realinst AC0, CPURegs:$rs)> { + list<Register> Defs = [DSPCtrl, AC0]; + list<Register> Uses = [AC0]; + InstrItinClass Itinerary = itin; +} + +class SHILO_R2_DESC_BASE<string instr_asm> { + dag OutOperandList = (outs ACRegs:$ac); + dag InOperandList = (ins CPURegs:$rs); + string AsmString = !strconcat(instr_asm, "\t$ac, $rs"); +} + +class MTHLIP_DESC_BASE<string instr_asm> { + dag OutOperandList = (outs ACRegs:$ac); + dag InOperandList = (ins CPURegs:$rs); + string AsmString = !strconcat(instr_asm, "\t$rs, $ac"); +} + +class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rd); + dag InOperandList = (ins uimm16:$mask); + string AsmString = !strconcat(instr_asm, "\t$rd, $mask"); + list<dag> Pattern = [(set CPURegs:$rd, (OpNode immZExt10:$mask))]; + InstrItinClass Itinerary = itin; + list<Register> Uses = [DSPCtrl]; +} + +class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs); + dag InOperandList = (ins CPURegs:$rs, uimm16:$mask); + string AsmString = !strconcat(instr_asm, "\t$rs, $mask"); + list<dag> Pattern = [(OpNode CPURegs:$rs, immZExt10:$mask)]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class DPA_W_PH_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, + Instruction realinst> : + PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt), + [(OpNode CPURegs:$rs, CPURegs:$rt)]>, + PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> { + list<Register> Defs = [DSPCtrl, AC0]; + list<Register> Uses = [AC0]; + InstrItinClass Itinerary = itin; +} + +class DPA_W_PH_DESC_BASE<string instr_asm> { + dag OutOperandList = (outs ACRegs:$ac); + dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt); + string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); +} + +class MULT_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, + Instruction realinst> : + PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt), + [(OpNode CPURegs:$rs, CPURegs:$rt)]>, + PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> { + list<Register> Defs = [DSPCtrl, AC0]; + InstrItinClass Itinerary = itin; +} + +class MULT_DESC_BASE<string instr_asm> { + dag OutOperandList = (outs ACRegs:$ac); + dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt); + string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); +} + +class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> : + MipsPseudo<(outs CPURegs:$dst), (ins), "", [(set CPURegs:$dst, (OpNode))]> { + list<Register> Uses = [DSPCtrl]; + bit usesCustomInserter = 1; +} + +class BPOSGE32_DESC_BASE<string instr_asm, InstrItinClass itin> { + dag OutOperandList = (outs); + dag InOperandList = (ins brtarget:$offset); + string AsmString = !strconcat(instr_asm, "\t$offset"); + InstrItinClass Itinerary = itin; + list<Register> Uses = [DSPCtrl]; + bit isBranch = 1; + bit isTerminator = 1; + bit hasDelaySlot = 1; +} + +class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rt); + dag InOperandList = (ins CPURegs:$src, CPURegs:$rs); + string AsmString = !strconcat(instr_asm, "\t$rt, $rs"); + list<dag> Pattern = [(set CPURegs:$rt, (OpNode CPURegs:$src, CPURegs:$rs))]; + InstrItinClass Itinerary = itin; + list<Register> Uses = [DSPCtrl]; + string Constraints = "$src = $rt"; +} + +//===----------------------------------------------------------------------===// +// MIPS DSP Rev 1 +//===----------------------------------------------------------------------===// + +// Addition/subtraction +class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", int_mips_addu_qb, NoItinerary, + DSPRegs, DSPRegs>, IsCommutable; + +class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb, + NoItinerary, DSPRegs, DSPRegs>, + IsCommutable; + +class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", int_mips_subu_qb, NoItinerary, + DSPRegs, DSPRegs>; + +class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb, + NoItinerary, DSPRegs, DSPRegs>; + +class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", int_mips_addq_ph, NoItinerary, + DSPRegs, DSPRegs>, IsCommutable; + +class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph, + NoItinerary, DSPRegs, DSPRegs>, + IsCommutable; + +class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", int_mips_subq_ph, NoItinerary, + DSPRegs, DSPRegs>; + +class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph, + NoItinerary, DSPRegs, DSPRegs>; + +class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w, + NoItinerary, CPURegs, CPURegs>, + IsCommutable; + +class SUBQ_S_W_DESC : ADDU_QB_DESC_BASE<"subq_s.w", int_mips_subq_s_w, + NoItinerary, CPURegs, CPURegs>; + +class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", int_mips_addsc, NoItinerary, + CPURegs, CPURegs>, IsCommutable; + +class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", int_mips_addwc, NoItinerary, + CPURegs, CPURegs>, + IsCommutable, UseDSPCtrl; + +class MODSUB_DESC : ADDU_QB_DESC_BASE<"modsub", int_mips_modsub, NoItinerary, + CPURegs, CPURegs>, ClearDefs; + +class RADDU_W_QB_DESC : RADDU_W_QB_DESC_BASE<"raddu.w.qb", int_mips_raddu_w_qb, + NoItinerary, CPURegs, DSPRegs>, + ClearDefs; + +// Absolute value +class ABSQ_S_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.ph", int_mips_absq_s_ph, + NoItinerary, DSPRegs>; + +class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w, + NoItinerary, CPURegs>; + +// Precision reduce/expand +class PRECRQ_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.qb.ph", + int_mips_precrq_qb_ph, + NoItinerary, DSPRegs, DSPRegs>, + ClearDefs; + +class PRECRQ_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.ph.w", + int_mips_precrq_ph_w, + NoItinerary, DSPRegs, CPURegs>, + ClearDefs; + +class PRECRQ_RS_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq_rs.ph.w", + int_mips_precrq_rs_ph_w, + NoItinerary, DSPRegs, + CPURegs>; + +class PRECRQU_S_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrqu_s.qb.ph", + int_mips_precrqu_s_qb_ph, + NoItinerary, DSPRegs, + DSPRegs>; + +class PRECEQ_W_PHL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phl", + int_mips_preceq_w_phl, + NoItinerary, CPURegs, DSPRegs>, + ClearDefs; + +class PRECEQ_W_PHR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phr", + int_mips_preceq_w_phr, + NoItinerary, CPURegs, DSPRegs>, + ClearDefs; + +class PRECEQU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbl", + int_mips_precequ_ph_qbl, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEQU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbr", + int_mips_precequ_ph_qbr, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEQU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbla", + int_mips_precequ_ph_qbla, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEQU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbra", + int_mips_precequ_ph_qbra, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbl", + int_mips_preceu_ph_qbl, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbr", + int_mips_preceu_ph_qbr, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbla", + int_mips_preceu_ph_qbla, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra", + int_mips_preceu_ph_qbra, + NoItinerary, DSPRegs>, + ClearDefs; + +// Shift +class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", int_mips_shll_qb, immZExt3, + NoItinerary, DSPRegs>; + +class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb, + NoItinerary, DSPRegs>; + +class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", int_mips_shrl_qb, immZExt3, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb, + NoItinerary, DSPRegs>, ClearDefs; + +class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", int_mips_shll_ph, immZExt4, + NoItinerary, DSPRegs>; + +class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph, + NoItinerary, DSPRegs>; + +class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph, + immZExt4, NoItinerary, DSPRegs>; + +class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph, + NoItinerary, DSPRegs>; + +class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", int_mips_shra_ph, immZExt4, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph, + immZExt4, NoItinerary, DSPRegs>, + ClearDefs; + +class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph, + NoItinerary, DSPRegs>, ClearDefs; + +class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w, + immZExt5, NoItinerary, CPURegs>; + +class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w, + NoItinerary, CPURegs>; + +class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w, + immZExt5, NoItinerary, CPURegs>, + ClearDefs; + +class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w, + NoItinerary, CPURegs>; + +// Multiplication +class MULEU_S_PH_QBL_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbl", + int_mips_muleu_s_ph_qbl, + NoItinerary, DSPRegs, DSPRegs>; + +class MULEU_S_PH_QBR_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbr", + int_mips_muleu_s_ph_qbr, + NoItinerary, DSPRegs, DSPRegs>; + +class MULEQ_S_W_PHL_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phl", + int_mips_muleq_s_w_phl, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class MULEQ_S_W_PHR_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phr", + int_mips_muleq_s_w_phr, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph, + NoItinerary, DSPRegs, DSPRegs>, + IsCommutable; + +class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph">; + +class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl">; + +class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr">; + +class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl">; + +class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr">; + +// Dot product with accumulate/subtract +class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl">; + +class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr">; + +class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl">; + +class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr">; + +class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph">; + +class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph">; + +class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w">; + +class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w">; + +class MULT_DSP_DESC : MULT_DESC_BASE<"mult">; + +class MULTU_DSP_DESC : MULT_DESC_BASE<"multu">; + +class MADD_DSP_DESC : MULT_DESC_BASE<"madd">; + +class MADDU_DSP_DESC : MULT_DESC_BASE<"maddu">; + +class MSUB_DSP_DESC : MULT_DESC_BASE<"msub">; + +class MSUBU_DSP_DESC : MULT_DESC_BASE<"msubu">; + +// Comparison +class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb", + int_mips_cmpu_eq_qb, NoItinerary, + DSPRegs>, IsCommutable; + +class CMPU_LT_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.lt.qb", + int_mips_cmpu_lt_qb, NoItinerary, + DSPRegs>, IsCommutable; + +class CMPU_LE_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.le.qb", + int_mips_cmpu_le_qb, NoItinerary, + DSPRegs>, IsCommutable; + +class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb", + int_mips_cmpgu_eq_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class CMPGU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.lt.qb", + int_mips_cmpgu_lt_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class CMPGU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.le.qb", + int_mips_cmpgu_le_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class CMP_EQ_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.eq.ph", int_mips_cmp_eq_ph, + NoItinerary, DSPRegs>, + IsCommutable; + +class CMP_LT_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.lt.ph", int_mips_cmp_lt_ph, + NoItinerary, DSPRegs>, + IsCommutable; + +class CMP_LE_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.le.ph", int_mips_cmp_le_ph, + NoItinerary, DSPRegs>, + IsCommutable; + +// Misc +class BITREV_DESC : ABSQ_S_PH_R2_DESC_BASE<"bitrev", int_mips_bitrev, + NoItinerary, CPURegs>, ClearDefs; + +class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph, + NoItinerary, DSPRegs, DSPRegs>, + ClearDefs; + +class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, immZExt8, + NoItinerary, DSPRegs>, ClearDefs; + +class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, immZExt10, + NoItinerary, DSPRegs>, ClearDefs; + +class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb, + NoItinerary, DSPRegs, CPURegs>, + ClearDefs; + +class REPLV_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.ph", int_mips_repl_ph, + NoItinerary, DSPRegs, CPURegs>, + ClearDefs; + +class PICK_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.qb", int_mips_pick_qb, + NoItinerary, DSPRegs, DSPRegs>, + ClearDefs, UseDSPCtrl; + +class PICK_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.ph", int_mips_pick_ph, + NoItinerary, DSPRegs, DSPRegs>, + ClearDefs, UseDSPCtrl; + +class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>, ClearDefs; + +class LHX_DESC : LX_DESC_BASE<"lhx", int_mips_lhx, NoItinerary>, ClearDefs; + +class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>, ClearDefs; + +class BPOSGE32_DESC : BPOSGE32_DESC_BASE<"bposge32", NoItinerary>; + +// Extr +class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>; + +class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>; + +class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>; + +class EXTPDPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpdpv", MipsEXTPDP, + NoItinerary>; + +class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>; + +class EXTRV_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv.w", MipsEXTR_W, + NoItinerary>; + +class EXTR_R_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_r.w", MipsEXTR_R_W, + NoItinerary>; + +class EXTRV_R_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_r.w", MipsEXTR_R_W, + NoItinerary>; + +class EXTR_RS_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W, + NoItinerary>; + +class EXTRV_RS_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W, + NoItinerary>; + +class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H, + NoItinerary>; + +class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H, + NoItinerary>; + +class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo">; + +class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov">; + +class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip">; + +class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>; + +class WRDSP_DESC : WRDSP_DESC_BASE<"wrdsp", int_mips_wrdsp, NoItinerary>; + +class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>; + +//===----------------------------------------------------------------------===// +// MIPS DSP Rev 2 +// Addition/subtraction +class ADDU_PH_DESC : ADDU_QB_DESC_BASE<"addu.ph", int_mips_addu_ph, NoItinerary, + DSPRegs, DSPRegs>, IsCommutable; + +class ADDU_S_PH_DESC : ADDU_QB_DESC_BASE<"addu_s.ph", int_mips_addu_s_ph, + NoItinerary, DSPRegs, DSPRegs>, + IsCommutable; + +class SUBU_PH_DESC : ADDU_QB_DESC_BASE<"subu.ph", int_mips_subu_ph, NoItinerary, + DSPRegs, DSPRegs>; + +class SUBU_S_PH_DESC : ADDU_QB_DESC_BASE<"subu_s.ph", int_mips_subu_s_ph, + NoItinerary, DSPRegs, DSPRegs>; + +class ADDUH_QB_DESC : ADDUH_QB_DESC_BASE<"adduh.qb", int_mips_adduh_qb, + NoItinerary, DSPRegs>, + ClearDefs, IsCommutable; + +class ADDUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"adduh_r.qb", int_mips_adduh_r_qb, + NoItinerary, DSPRegs>, + ClearDefs, IsCommutable; + +class SUBUH_QB_DESC : ADDUH_QB_DESC_BASE<"subuh.qb", int_mips_subuh_qb, + NoItinerary, DSPRegs>, ClearDefs; + +class SUBUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"subuh_r.qb", int_mips_subuh_r_qb, + NoItinerary, DSPRegs>, ClearDefs; + +class ADDQH_PH_DESC : ADDUH_QB_DESC_BASE<"addqh.ph", int_mips_addqh_ph, + NoItinerary, DSPRegs>, + ClearDefs, IsCommutable; + +class ADDQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"addqh_r.ph", int_mips_addqh_r_ph, + NoItinerary, DSPRegs>, + ClearDefs, IsCommutable; + +class SUBQH_PH_DESC : ADDUH_QB_DESC_BASE<"subqh.ph", int_mips_subqh_ph, + NoItinerary, DSPRegs>, ClearDefs; + +class SUBQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"subqh_r.ph", int_mips_subqh_r_ph, + NoItinerary, DSPRegs>, ClearDefs; + +class ADDQH_W_DESC : ADDUH_QB_DESC_BASE<"addqh.w", int_mips_addqh_w, + NoItinerary, CPURegs>, + ClearDefs, IsCommutable; + +class ADDQH_R_W_DESC : ADDUH_QB_DESC_BASE<"addqh_r.w", int_mips_addqh_r_w, + NoItinerary, CPURegs>, + ClearDefs, IsCommutable; + +class SUBQH_W_DESC : ADDUH_QB_DESC_BASE<"subqh.w", int_mips_subqh_w, + NoItinerary, CPURegs>, ClearDefs; + +class SUBQH_R_W_DESC : ADDUH_QB_DESC_BASE<"subqh_r.w", int_mips_subqh_r_w, + NoItinerary, CPURegs>, ClearDefs; + +// Comparison +class CMPGDU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.eq.qb", + int_mips_cmpgdu_eq_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class CMPGDU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.lt.qb", + int_mips_cmpgdu_lt_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class CMPGDU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.le.qb", + int_mips_cmpgdu_le_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +// Absolute +class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb, + NoItinerary, DSPRegs>; + +// Multiplication +class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", int_mips_mul_ph, NoItinerary, + DSPRegs>, IsCommutable; + +class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph, + NoItinerary, DSPRegs>, IsCommutable; + +class MULQ_S_W_DESC : ADDUH_QB_DESC_BASE<"mulq_s.w", int_mips_mulq_s_w, + NoItinerary, CPURegs>, IsCommutable; + +class MULQ_RS_W_DESC : ADDUH_QB_DESC_BASE<"mulq_rs.w", int_mips_mulq_rs_w, + NoItinerary, CPURegs>, IsCommutable; + +class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph, + NoItinerary, DSPRegs, DSPRegs>, + IsCommutable; + +// Dot product with accumulate/subtract +class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph">; + +class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph">; + +class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph">; + +class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph">; + +class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph">; + +class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph">; + +class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph">; + +class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph">; + +class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph">; + +// Precision reduce/expand +class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph", + int_mips_precr_qb_ph, + NoItinerary, DSPRegs, DSPRegs>; + +class PRECR_SRA_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra.ph.w", + int_mips_precr_sra_ph_w, + NoItinerary, DSPRegs, + CPURegs>, ClearDefs; + +class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w", + int_mips_precr_sra_r_ph_w, + NoItinerary, DSPRegs, + CPURegs>, ClearDefs; + +// Shift +class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", int_mips_shra_qb, immZExt3, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb, + immZExt3, NoItinerary, DSPRegs>, + ClearDefs; + +class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", int_mips_shrl_ph, immZExt4, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph, + NoItinerary, DSPRegs>, ClearDefs; + +// Misc +class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5, + NoItinerary>, ClearDefs; + +class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, immZExt2, + NoItinerary>, ClearDefs; + +class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, immZExt5, + NoItinerary>, ClearDefs; + +// Pseudos. +def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32, NoItinerary>; + +// Instruction defs. +// MIPS DSP Rev 1 +def ADDU_QB : ADDU_QB_ENC, ADDU_QB_DESC; +def ADDU_S_QB : ADDU_S_QB_ENC, ADDU_S_QB_DESC; +def SUBU_QB : SUBU_QB_ENC, SUBU_QB_DESC; +def SUBU_S_QB : SUBU_S_QB_ENC, SUBU_S_QB_DESC; +def ADDQ_PH : ADDQ_PH_ENC, ADDQ_PH_DESC; +def ADDQ_S_PH : ADDQ_S_PH_ENC, ADDQ_S_PH_DESC; +def SUBQ_PH : SUBQ_PH_ENC, SUBQ_PH_DESC; +def SUBQ_S_PH : SUBQ_S_PH_ENC, SUBQ_S_PH_DESC; +def ADDQ_S_W : ADDQ_S_W_ENC, ADDQ_S_W_DESC; +def SUBQ_S_W : SUBQ_S_W_ENC, SUBQ_S_W_DESC; +def ADDSC : ADDSC_ENC, ADDSC_DESC; +def ADDWC : ADDWC_ENC, ADDWC_DESC; +def MODSUB : MODSUB_ENC, MODSUB_DESC; +def RADDU_W_QB : RADDU_W_QB_ENC, RADDU_W_QB_DESC; +def ABSQ_S_PH : ABSQ_S_PH_ENC, ABSQ_S_PH_DESC; +def ABSQ_S_W : ABSQ_S_W_ENC, ABSQ_S_W_DESC; +def PRECRQ_QB_PH : PRECRQ_QB_PH_ENC, PRECRQ_QB_PH_DESC; +def PRECRQ_PH_W : PRECRQ_PH_W_ENC, PRECRQ_PH_W_DESC; +def PRECRQ_RS_PH_W : PRECRQ_RS_PH_W_ENC, PRECRQ_RS_PH_W_DESC; +def PRECRQU_S_QB_PH : PRECRQU_S_QB_PH_ENC, PRECRQU_S_QB_PH_DESC; +def PRECEQ_W_PHL : PRECEQ_W_PHL_ENC, PRECEQ_W_PHL_DESC; +def PRECEQ_W_PHR : PRECEQ_W_PHR_ENC, PRECEQ_W_PHR_DESC; +def PRECEQU_PH_QBL : PRECEQU_PH_QBL_ENC, PRECEQU_PH_QBL_DESC; +def PRECEQU_PH_QBR : PRECEQU_PH_QBR_ENC, PRECEQU_PH_QBR_DESC; +def PRECEQU_PH_QBLA : PRECEQU_PH_QBLA_ENC, PRECEQU_PH_QBLA_DESC; +def PRECEQU_PH_QBRA : PRECEQU_PH_QBRA_ENC, PRECEQU_PH_QBRA_DESC; +def PRECEU_PH_QBL : PRECEU_PH_QBL_ENC, PRECEU_PH_QBL_DESC; +def PRECEU_PH_QBR : PRECEU_PH_QBR_ENC, PRECEU_PH_QBR_DESC; +def PRECEU_PH_QBLA : PRECEU_PH_QBLA_ENC, PRECEU_PH_QBLA_DESC; +def PRECEU_PH_QBRA : PRECEU_PH_QBRA_ENC, PRECEU_PH_QBRA_DESC; +def SHLL_QB : SHLL_QB_ENC, SHLL_QB_DESC; +def SHLLV_QB : SHLLV_QB_ENC, SHLLV_QB_DESC; +def SHRL_QB : SHRL_QB_ENC, SHRL_QB_DESC; +def SHRLV_QB : SHRLV_QB_ENC, SHRLV_QB_DESC; +def SHLL_PH : SHLL_PH_ENC, SHLL_PH_DESC; +def SHLLV_PH : SHLLV_PH_ENC, SHLLV_PH_DESC; +def SHLL_S_PH : SHLL_S_PH_ENC, SHLL_S_PH_DESC; +def SHLLV_S_PH : SHLLV_S_PH_ENC, SHLLV_S_PH_DESC; +def SHRA_PH : SHRA_PH_ENC, SHRA_PH_DESC; +def SHRAV_PH : SHRAV_PH_ENC, SHRAV_PH_DESC; +def SHRA_R_PH : SHRA_R_PH_ENC, SHRA_R_PH_DESC; +def SHRAV_R_PH : SHRAV_R_PH_ENC, SHRAV_R_PH_DESC; +def SHLL_S_W : SHLL_S_W_ENC, SHLL_S_W_DESC; +def SHLLV_S_W : SHLLV_S_W_ENC, SHLLV_S_W_DESC; +def SHRA_R_W : SHRA_R_W_ENC, SHRA_R_W_DESC; +def SHRAV_R_W : SHRAV_R_W_ENC, SHRAV_R_W_DESC; +def MULEU_S_PH_QBL : MULEU_S_PH_QBL_ENC, MULEU_S_PH_QBL_DESC; +def MULEU_S_PH_QBR : MULEU_S_PH_QBR_ENC, MULEU_S_PH_QBR_DESC; +def MULEQ_S_W_PHL : MULEQ_S_W_PHL_ENC, MULEQ_S_W_PHL_DESC; +def MULEQ_S_W_PHR : MULEQ_S_W_PHR_ENC, MULEQ_S_W_PHR_DESC; +def MULQ_RS_PH : MULQ_RS_PH_ENC, MULQ_RS_PH_DESC; +def MULSAQ_S_W_PH : MULSAQ_S_W_PH_ENC, MULSAQ_S_W_PH_DESC; +def MAQ_S_W_PHL : MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC; +def MAQ_S_W_PHR : MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC; +def MAQ_SA_W_PHL : MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC; +def MAQ_SA_W_PHR : MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC; +def DPAU_H_QBL : DPAU_H_QBL_ENC, DPAU_H_QBL_DESC; +def DPAU_H_QBR : DPAU_H_QBR_ENC, DPAU_H_QBR_DESC; +def DPSU_H_QBL : DPSU_H_QBL_ENC, DPSU_H_QBL_DESC; +def DPSU_H_QBR : DPSU_H_QBR_ENC, DPSU_H_QBR_DESC; +def DPAQ_S_W_PH : DPAQ_S_W_PH_ENC, DPAQ_S_W_PH_DESC; +def DPSQ_S_W_PH : DPSQ_S_W_PH_ENC, DPSQ_S_W_PH_DESC; +def DPAQ_SA_L_W : DPAQ_SA_L_W_ENC, DPAQ_SA_L_W_DESC; +def DPSQ_SA_L_W : DPSQ_SA_L_W_ENC, DPSQ_SA_L_W_DESC; +def MULT_DSP : MULT_DSP_ENC, MULT_DSP_DESC; +def MULTU_DSP : MULTU_DSP_ENC, MULTU_DSP_DESC; +def MADD_DSP : MADD_DSP_ENC, MADD_DSP_DESC; +def MADDU_DSP : MADDU_DSP_ENC, MADDU_DSP_DESC; +def MSUB_DSP : MSUB_DSP_ENC, MSUB_DSP_DESC; +def MSUBU_DSP : MSUBU_DSP_ENC, MSUBU_DSP_DESC; +def CMPU_EQ_QB : CMPU_EQ_QB_ENC, CMPU_EQ_QB_DESC; +def CMPU_LT_QB : CMPU_LT_QB_ENC, CMPU_LT_QB_DESC; +def CMPU_LE_QB : CMPU_LE_QB_ENC, CMPU_LE_QB_DESC; +def CMPGU_EQ_QB : CMPGU_EQ_QB_ENC, CMPGU_EQ_QB_DESC; +def CMPGU_LT_QB : CMPGU_LT_QB_ENC, CMPGU_LT_QB_DESC; +def CMPGU_LE_QB : CMPGU_LE_QB_ENC, CMPGU_LE_QB_DESC; +def CMP_EQ_PH : CMP_EQ_PH_ENC, CMP_EQ_PH_DESC; +def CMP_LT_PH : CMP_LT_PH_ENC, CMP_LT_PH_DESC; +def CMP_LE_PH : CMP_LE_PH_ENC, CMP_LE_PH_DESC; +def BITREV : BITREV_ENC, BITREV_DESC; +def PACKRL_PH : PACKRL_PH_ENC, PACKRL_PH_DESC; +def REPL_QB : REPL_QB_ENC, REPL_QB_DESC; +def REPL_PH : REPL_PH_ENC, REPL_PH_DESC; +def REPLV_QB : REPLV_QB_ENC, REPLV_QB_DESC; +def REPLV_PH : REPLV_PH_ENC, REPLV_PH_DESC; +def PICK_QB : PICK_QB_ENC, PICK_QB_DESC; +def PICK_PH : PICK_PH_ENC, PICK_PH_DESC; +def LWX : LWX_ENC, LWX_DESC; +def LHX : LHX_ENC, LHX_DESC; +def LBUX : LBUX_ENC, LBUX_DESC; +def BPOSGE32 : BPOSGE32_ENC, BPOSGE32_DESC; +def INSV : INSV_ENC, INSV_DESC; +def EXTP : EXTP_ENC, EXTP_DESC; +def EXTPV : EXTPV_ENC, EXTPV_DESC; +def EXTPDP : EXTPDP_ENC, EXTPDP_DESC; +def EXTPDPV : EXTPDPV_ENC, EXTPDPV_DESC; +def EXTR_W : EXTR_W_ENC, EXTR_W_DESC; +def EXTRV_W : EXTRV_W_ENC, EXTRV_W_DESC; +def EXTR_R_W : EXTR_R_W_ENC, EXTR_R_W_DESC; +def EXTRV_R_W : EXTRV_R_W_ENC, EXTRV_R_W_DESC; +def EXTR_RS_W : EXTR_RS_W_ENC, EXTR_RS_W_DESC; +def EXTRV_RS_W : EXTRV_RS_W_ENC, EXTRV_RS_W_DESC; +def EXTR_S_H : EXTR_S_H_ENC, EXTR_S_H_DESC; +def EXTRV_S_H : EXTRV_S_H_ENC, EXTRV_S_H_DESC; +def SHILO : SHILO_ENC, SHILO_DESC; +def SHILOV : SHILOV_ENC, SHILOV_DESC; +def MTHLIP : MTHLIP_ENC, MTHLIP_DESC; +def RDDSP : RDDSP_ENC, RDDSP_DESC; +def WRDSP : WRDSP_ENC, WRDSP_DESC; + +// MIPS DSP Rev 2 +let Predicates = [HasDSPR2] in { + +def ADDU_PH : ADDU_PH_ENC, ADDU_PH_DESC; +def ADDU_S_PH : ADDU_S_PH_ENC, ADDU_S_PH_DESC; +def SUBU_PH : SUBU_PH_ENC, SUBU_PH_DESC; +def SUBU_S_PH : SUBU_S_PH_ENC, SUBU_S_PH_DESC; +def CMPGDU_EQ_QB : CMPGDU_EQ_QB_ENC, CMPGDU_EQ_QB_DESC; +def CMPGDU_LT_QB : CMPGDU_LT_QB_ENC, CMPGDU_LT_QB_DESC; +def CMPGDU_LE_QB : CMPGDU_LE_QB_ENC, CMPGDU_LE_QB_DESC; +def ABSQ_S_QB : ABSQ_S_QB_ENC, ABSQ_S_QB_DESC; +def ADDUH_QB : ADDUH_QB_ENC, ADDUH_QB_DESC; +def ADDUH_R_QB : ADDUH_R_QB_ENC, ADDUH_R_QB_DESC; +def SUBUH_QB : SUBUH_QB_ENC, SUBUH_QB_DESC; +def SUBUH_R_QB : SUBUH_R_QB_ENC, SUBUH_R_QB_DESC; +def ADDQH_PH : ADDQH_PH_ENC, ADDQH_PH_DESC; +def ADDQH_R_PH : ADDQH_R_PH_ENC, ADDQH_R_PH_DESC; +def SUBQH_PH : SUBQH_PH_ENC, SUBQH_PH_DESC; +def SUBQH_R_PH : SUBQH_R_PH_ENC, SUBQH_R_PH_DESC; +def ADDQH_W : ADDQH_W_ENC, ADDQH_W_DESC; +def ADDQH_R_W : ADDQH_R_W_ENC, ADDQH_R_W_DESC; +def SUBQH_W : SUBQH_W_ENC, SUBQH_W_DESC; +def SUBQH_R_W : SUBQH_R_W_ENC, SUBQH_R_W_DESC; +def MUL_PH : MUL_PH_ENC, MUL_PH_DESC; +def MUL_S_PH : MUL_S_PH_ENC, MUL_S_PH_DESC; +def MULQ_S_W : MULQ_S_W_ENC, MULQ_S_W_DESC; +def MULQ_RS_W : MULQ_RS_W_ENC, MULQ_RS_W_DESC; +def MULQ_S_PH : MULQ_S_PH_ENC, MULQ_S_PH_DESC; +def DPA_W_PH : DPA_W_PH_ENC, DPA_W_PH_DESC; +def DPS_W_PH : DPS_W_PH_ENC, DPS_W_PH_DESC; +def DPAQX_S_W_PH : DPAQX_S_W_PH_ENC, DPAQX_S_W_PH_DESC; +def DPAQX_SA_W_PH : DPAQX_SA_W_PH_ENC, DPAQX_SA_W_PH_DESC; +def DPAX_W_PH : DPAX_W_PH_ENC, DPAX_W_PH_DESC; +def DPSX_W_PH : DPSX_W_PH_ENC, DPSX_W_PH_DESC; +def DPSQX_S_W_PH : DPSQX_S_W_PH_ENC, DPSQX_S_W_PH_DESC; +def DPSQX_SA_W_PH : DPSQX_SA_W_PH_ENC, DPSQX_SA_W_PH_DESC; +def MULSA_W_PH : MULSA_W_PH_ENC, MULSA_W_PH_DESC; +def PRECR_QB_PH : PRECR_QB_PH_ENC, PRECR_QB_PH_DESC; +def PRECR_SRA_PH_W : PRECR_SRA_PH_W_ENC, PRECR_SRA_PH_W_DESC; +def PRECR_SRA_R_PH_W : PRECR_SRA_R_PH_W_ENC, PRECR_SRA_R_PH_W_DESC; +def SHRA_QB : SHRA_QB_ENC, SHRA_QB_DESC; +def SHRAV_QB : SHRAV_QB_ENC, SHRAV_QB_DESC; +def SHRA_R_QB : SHRA_R_QB_ENC, SHRA_R_QB_DESC; +def SHRAV_R_QB : SHRAV_R_QB_ENC, SHRAV_R_QB_DESC; +def SHRL_PH : SHRL_PH_ENC, SHRL_PH_DESC; +def SHRLV_PH : SHRLV_PH_ENC, SHRLV_PH_DESC; +def APPEND : APPEND_ENC, APPEND_DESC; +def BALIGN : BALIGN_ENC, BALIGN_DESC; +def PREPEND : PREPEND_ENC, PREPEND_DESC; + +} + +// Pseudos. +def MULSAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSAQ_S_W_PH, NoItinerary, + MULSAQ_S_W_PH>; +def MAQ_S_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHL, NoItinerary, + MAQ_S_W_PHL>; +def MAQ_S_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHR, NoItinerary, + MAQ_S_W_PHR>; +def MAQ_SA_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHL, NoItinerary, + MAQ_SA_W_PHL>; +def MAQ_SA_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHR, NoItinerary, + MAQ_SA_W_PHR>; +def DPAU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBL, NoItinerary, + DPAU_H_QBL>; +def DPAU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBR, NoItinerary, + DPAU_H_QBR>; +def DPSU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBL, NoItinerary, + DPSU_H_QBL>; +def DPSU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBR, NoItinerary, + DPSU_H_QBR>; +def DPAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_S_W_PH, NoItinerary, + DPAQ_S_W_PH>; +def DPSQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_S_W_PH, NoItinerary, + DPSQ_S_W_PH>; +def DPAQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_SA_L_W, NoItinerary, + DPAQ_SA_L_W>; +def DPSQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_SA_L_W, NoItinerary, + DPSQ_SA_L_W>; + +def MULT_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULT, NoItinerary, MULT_DSP>, + IsCommutable; +def MULTU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULTU, NoItinerary, MULTU_DSP>, + IsCommutable; +def MADD_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADD_DSP, NoItinerary, MADD_DSP>, + IsCommutable, UseAC; +def MADDU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADDU_DSP, NoItinerary, MADDU_DSP>, + IsCommutable, UseAC; +def MSUB_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUB_DSP, NoItinerary, MSUB_DSP>, + UseAC; +def MSUBU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUBU_DSP, NoItinerary, MSUBU_DSP>, + UseAC; + +def SHILO_PSEUDO : SHILO_R1_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILO>; +def SHILOV_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILOV>; +def MTHLIP_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsMTHLIP, NoItinerary, MTHLIP>; + +let Predicates = [HasDSPR2] in { + +def DPA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPA_W_PH, NoItinerary, DPA_W_PH>; +def DPS_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPS_W_PH, NoItinerary, DPS_W_PH>; +def DPAQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_S_W_PH, NoItinerary, + DPAQX_S_W_PH>; +def DPAQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_SA_W_PH, NoItinerary, + DPAQX_SA_W_PH>; +def DPAX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAX_W_PH, NoItinerary, + DPAX_W_PH>; +def DPSX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSX_W_PH, NoItinerary, + DPSX_W_PH>; +def DPSQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_S_W_PH, NoItinerary, + DPSQX_S_W_PH>; +def DPSQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_SA_W_PH, NoItinerary, + DPSQX_SA_W_PH>; +def MULSA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSA_W_PH, NoItinerary, + MULSA_W_PH>; + +} + +// Patterns. +class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> : + Pat<pattern, result>, Requires<[pred]>; + +class BitconvertPat<ValueType DstVT, ValueType SrcVT, RegisterClass DstRC, + RegisterClass SrcRC> : + DSPPat<(DstVT (bitconvert (SrcVT SrcRC:$src))), + (COPY_TO_REGCLASS SrcRC:$src, DstRC)>; + +def : BitconvertPat<i32, v2i16, CPURegs, DSPRegs>; +def : BitconvertPat<i32, v4i8, CPURegs, DSPRegs>; +def : BitconvertPat<v2i16, i32, DSPRegs, CPURegs>; +def : BitconvertPat<v4i8, i32, DSPRegs, CPURegs>; + +def : DSPPat<(v2i16 (load addr:$a)), + (v2i16 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>; +def : DSPPat<(v4i8 (load addr:$a)), + (v4i8 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>; +def : DSPPat<(store (v2i16 DSPRegs:$val), addr:$a), + (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>; +def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a), + (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>; + +// Extr patterns. +class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> : + DSPPat<(i32 (OpNode CPURegs:$rs)), (Instr AC0, CPURegs:$rs)>; + +class EXTR_W_TY1_R1_Pat<SDPatternOperator OpNode, Instruction Instr> : + DSPPat<(i32 (OpNode immZExt5:$shift)), (Instr AC0, immZExt5:$shift)>; + +def : EXTR_W_TY1_R1_Pat<MipsEXTP, EXTP>; +def : EXTR_W_TY1_R2_Pat<MipsEXTP, EXTPV>; +def : EXTR_W_TY1_R1_Pat<MipsEXTPDP, EXTPDP>; +def : EXTR_W_TY1_R2_Pat<MipsEXTPDP, EXTPDPV>; +def : EXTR_W_TY1_R1_Pat<MipsEXTR_W, EXTR_W>; +def : EXTR_W_TY1_R2_Pat<MipsEXTR_W, EXTRV_W>; +def : EXTR_W_TY1_R1_Pat<MipsEXTR_R_W, EXTR_R_W>; +def : EXTR_W_TY1_R2_Pat<MipsEXTR_R_W, EXTRV_R_W>; +def : EXTR_W_TY1_R1_Pat<MipsEXTR_RS_W, EXTR_RS_W>; +def : EXTR_W_TY1_R2_Pat<MipsEXTR_RS_W, EXTRV_RS_W>; +def : EXTR_W_TY1_R1_Pat<MipsEXTR_S_H, EXTR_S_H>; +def : EXTR_W_TY1_R2_Pat<MipsEXTR_S_H, EXTRV_S_H>; diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp index 2bba8a3..e3c8ed7 100644 --- a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -30,10 +30,11 @@ STATISTIC(FilledSlots, "Number of delay slots filled"); STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that" " are not NOP."); -static cl::opt<bool> EnableDelaySlotFiller( - "enable-mips-delay-filler", +static cl::opt<bool> DisableDelaySlotFiller( + "disable-mips-delay-filler", cl::init(false), - cl::desc("Fill the Mips delay slots useful instructions."), + cl::desc("Disable the delay slot filler, which attempts to fill the Mips" + "delay slots with useful instructions."), cl::Hidden); // This option can be used to silence complaints by machine verifier passes. @@ -114,7 +115,9 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) { InstrIter D; - if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) { + // Delay slot filling is disabled at -O0. + if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) && + findDelayInstr(MBB, I, D)) { MBB.splice(llvm::next(I), &MBB, D); ++UsefulSlots; } else diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp index 8c0474b..2cad2a6 100644 --- a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp @@ -23,7 +23,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -98,3 +98,37 @@ bool MipsFrameLowering::hasFP(const MachineFunction &MF) const { return MF.getTarget().Options.DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); } + +uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo(); + + int64_t Offset = 0; + + // Iterate over fixed sized objects. + for (int I = MFI->getObjectIndexBegin(); I != 0; ++I) + Offset = std::max(Offset, -MFI->getObjectOffset(I)); + + // Conservatively assume all callee-saved registers will be saved. + for (const uint16_t *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) { + unsigned Size = TRI.getMinimalPhysRegClass(*R)->getSize(); + Offset = RoundUpToAlignment(Offset + Size, Size); + } + + unsigned MaxAlign = MFI->getMaxAlignment(); + + // Check that MaxAlign is not zero if there is a stack object that is not a + // callee-saved spill. + assert(!MFI->getObjectIndexEnd() || MaxAlign); + + // Iterate over other objects. + for (unsigned I = 0, E = MFI->getObjectIndexEnd(); I != E; ++I) + Offset = RoundUpToAlignment(Offset + MFI->getObjectSize(I), MaxAlign); + + // Call frame. + if (MFI->adjustsStack() && hasReservedCallFrame(MF)) + Offset = RoundUpToAlignment(Offset + MFI->getMaxCallFrameSize(), + std::max(MaxAlign, getStackAlignment())); + + return RoundUpToAlignment(Offset, getStackAlignment()); +} diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h index ed7b7fe..df52d92 100644 --- a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h +++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h @@ -34,6 +34,9 @@ public: const MipsSubtarget &ST); bool hasFP(const MachineFunction &MF) const; + +protected: + uint64_t estimateStackSize(const MachineFunction &MF) const; }; /// Create MipsInstrInfo objects. diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp index 5a97c17..c5fca7f 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -86,6 +86,10 @@ private: SDNode *getGlobalBaseReg(); + SDValue getMips16SPAliasReg(); + + void getMips16SPRefReg(SDNode *parent, SDValue &AliasReg); + std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty, bool HasLo, bool HasHi); @@ -94,6 +98,9 @@ private: // Complex Pattern. bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset); + bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset, + SDValue &Alias); + // getImm - Return a target constant with the specified value. inline SDValue getImm(const SDNode *Node, unsigned Imm) { return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); @@ -102,6 +109,7 @@ private: void ProcessFunctionAfterISel(MachineFunction &MF); bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); void InitGlobalBaseReg(MachineFunction &MF); + void InitMips16SPAliasReg(MachineFunction &MF); virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, @@ -220,6 +228,26 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { .addReg(Mips::V0).addReg(Mips::T9); } +// Insert instructions to initialize the Mips16 SP Alias register in the +// first MBB of the function. +// +void MipsDAGToDAGISel::InitMips16SPAliasReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->mips16SPAliasRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg(); + + BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg) + .addReg(Mips::SP); +} + + bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr& MI) { unsigned DstReg = 0, ZeroReg = 0; @@ -260,6 +288,7 @@ bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) { InitGlobalBaseReg(MF); + InitMips16SPAliasReg(MF); MachineRegisterInfo *MRI = &MF.getRegInfo(); @@ -284,6 +313,14 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } +/// getMips16SPAliasReg - Output the instructions required to put the +/// SP into a Mips16 accessible aliased register. +SDValue MipsDAGToDAGISel::getMips16SPAliasReg() { + unsigned Mips16SPAliasReg = + MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg(); + return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy()); +} + /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel:: @@ -337,8 +374,9 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { // Generate: // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) - if (Addr.getOperand(1).getOpcode() == MipsISD::Lo) { - SDValue LoVal = Addr.getOperand(1), Opnd0 = LoVal.getOperand(0); + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || + Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { + SDValue Opnd0 = Addr.getOperand(1).getOperand(0); if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || isa<JumpTableSDNode>(Opnd0)) { Base = Addr.getOperand(0); @@ -361,6 +399,115 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { return true; } +void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { + SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy()); + if (Parent) { + switch (Parent->getOpcode()) { + case ISD::LOAD: { + LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent); + switch (SD->getMemoryVT().getSizeInBits()) { + case 8: + case 16: + AliasReg = TM.getFrameLowering()->hasFP(*MF)? + AliasFPReg: getMips16SPAliasReg(); + return; + } + break; + } + case ISD::STORE: { + StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent); + switch (SD->getMemoryVT().getSizeInBits()) { + case 8: + case 16: + AliasReg = TM.getFrameLowering()->hasFP(*MF)? + AliasFPReg: getMips16SPAliasReg(); + return; + } + break; + } + } + } + AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy()); + return; + +} +bool MipsDAGToDAGISel::SelectAddr16( + SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset, + SDValue &Alias) { + EVT ValTy = Addr.getValueType(); + + Alias = CurDAG->getTargetConstant(0, ValTy); + + // if Address is FI, get the TargetFrameIndex. + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, ValTy); + getMips16SPRefReg(Parent, Alias); + return true; + } + // on PIC code Load GA + if (Addr.getOpcode() == MipsISD::Wrapper) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + if (TM.getRelocationModel() != Reloc::PIC_) { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + // Addresses of the form FI+const or FI|const + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + if (isInt<16>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> + (Addr.getOperand(0))) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + getMips16SPRefReg(Parent, Alias); + } + else + Base = Addr.getOperand(0); + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); + return true; + } + } + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + // When loading from constant pools, load the lower address part in + // the instruction itself. Example, instead of: + // lui $2, %hi($CPI1_0) + // addiu $2, $2, %lo($CPI1_0) + // lwc1 $f0, 0($2) + // Generate: + // lui $2, %hi($CPI1_0) + // lwc1 $f0, %lo($CPI1_0)($2) + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || + Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { + SDValue Opnd0 = Addr.getOperand(1).getOperand(0); + if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || + isa<JumpTableSDNode>(Opnd0)) { + Base = Addr.getOperand(0); + Offset = Opnd0; + return true; + } + } + + // If an indexed floating point load/store can be emitted, return false. + const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); + + if (LS && + (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && + Subtarget.hasMips32r2Or64()) + return false; + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, ValTy); + return true; +} + /// Select multiply instructions. std::pair<SDNode*, SDNode*> MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty, @@ -371,14 +518,16 @@ MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty, SDValue InFlag = SDValue(Mul, 0); if (HasLo) { - Lo = CurDAG->getMachineNode(Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64, dl, - Ty, MVT::Glue, InFlag); + unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mflo16 : + (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64); + Lo = CurDAG->getMachineNode(Opcode, dl, Ty, MVT::Glue, InFlag); InFlag = SDValue(Lo, 1); } - if (HasHi) - Hi = CurDAG->getMachineNode(Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64, dl, - Ty, InFlag); - + if (HasHi) { + unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mfhi16 : + (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64); + Hi = CurDAG->getMachineNode(Opcode, dl, Ty, InFlag); + } return std::make_pair(Lo, Hi); } @@ -410,6 +559,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { case ISD::SUBE: case ISD::ADDE: { + bool inMips16Mode = Subtarget.inMips16Mode(); SDValue InFlag = Node->getOperand(2), CmpLHS; unsigned Opc = InFlag.getOpcode(); (void)Opc; assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || @@ -419,10 +569,16 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { unsigned MOp; if (Opcode == ISD::ADDE) { CmpLHS = InFlag.getValue(0); - MOp = Mips::ADDu; + if (inMips16Mode) + MOp = Mips::AdduRxRyRz16; + else + MOp = Mips::ADDu; } else { CmpLHS = InFlag.getOperand(0); - MOp = Mips::SUBu; + if (inMips16Mode) + MOp = Mips::SubuRxRyRz16; + else + MOp = Mips::SUBu; } SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; @@ -431,8 +587,11 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { SDValue RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); - SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2); - SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, + + unsigned Sltu_op = inMips16Mode? Mips::SltuRxRyRz16: Mips::SLTu; + SDNode *Carry = CurDAG->getMachineNode(Sltu_op, dl, VT, Ops, 2); + unsigned Addu_op = inMips16Mode? Mips::AdduRxRyRz16 : Mips::ADDu; + SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, dl, VT, SDValue(Carry,0), RHS); return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, @@ -442,8 +601,13 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { /// Mul with two results case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { - if (NodeTy == MVT::i32) - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); + if (NodeTy == MVT::i32) { + if (Subtarget.inMips16Mode()) + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : + Mips::MultRxRy16); + else + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); + } else MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT); @@ -469,8 +633,13 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { } case ISD::MULHS: case ISD::MULHU: { - if (NodeTy == MVT::i32) - MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); + if (NodeTy == MVT::i32) { + if (Subtarget.inMips16Mode()) + MultOpc = (Opcode == ISD::MULHU ? + Mips::MultuRxRy16 : Mips::MultRxRy16); + else + MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); + } else MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT); @@ -539,6 +708,15 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { return RegOpnd; } +#ifndef NDEBUG + case ISD::LOAD: + case ISD::STORE: + assert(cast<MemSDNode>(Node)->getMemoryVT().getSizeInBits() / 8 <= + cast<MemSDNode>(Node)->getAlignment() && + "Unexpected unaligned loads/stores."); + break; +#endif + case MipsISD::ThreadPointer: { EVT PtrVT = TLI.getPointerTy(); unsigned RdhwrOpc, SrcReg, DestReg; diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp index c5207c6..e225b6c 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -25,6 +25,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Intrinsics.h" #include "llvm/CallingConv.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -32,12 +33,33 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +STATISTIC(NumTailCalls, "Number of tail calls"); + +static cl::opt<bool> +EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, + cl::desc("MIPS: Enable tail calls."), cl::init(false)); + +static const uint16_t O32IntRegs[4] = { + Mips::A0, Mips::A1, Mips::A2, Mips::A3 +}; + +static const uint16_t Mips64IntRegs[8] = { + Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64, + Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64 +}; + +static const uint16_t Mips64DPRegs[8] = { + Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, + Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64 +}; + // If I is a shifted mask, set the size (Size) and the first bit of the // mask (Pos), and return true. // For example, if I is 0x003ff800, (Pos, Size) = (11, 11). @@ -58,6 +80,7 @@ static SDValue GetGlobalReg(SelectionDAG &DAG, EVT Ty) { const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { case MipsISD::JmpLink: return "MipsISD::JmpLink"; + case MipsISD::TailCall: return "MipsISD::TailCall"; case MipsISD::Hi: return "MipsISD::Hi"; case MipsISD::Lo: return "MipsISD::Lo"; case MipsISD::GPRel: return "MipsISD::GPRel"; @@ -89,6 +112,20 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::LDR: return "MipsISD::LDR"; case MipsISD::SDL: return "MipsISD::SDL"; case MipsISD::SDR: return "MipsISD::SDR"; + case MipsISD::EXTP: return "MipsISD::EXTP"; + case MipsISD::EXTPDP: return "MipsISD::EXTPDP"; + case MipsISD::EXTR_S_H: return "MipsISD::EXTR_S_H"; + case MipsISD::EXTR_W: return "MipsISD::EXTR_W"; + case MipsISD::EXTR_R_W: return "MipsISD::EXTR_R_W"; + case MipsISD::EXTR_RS_W: return "MipsISD::EXTR_RS_W"; + case MipsISD::SHILO: return "MipsISD::SHILO"; + case MipsISD::MTHLIP: return "MipsISD::MTHLIP"; + case MipsISD::MULT: return "MipsISD::MULT"; + case MipsISD::MULTU: return "MipsISD::MULTU"; + case MipsISD::MADD_DSP: return "MipsISD::MADD_DSPDSP"; + case MipsISD::MADDU_DSP: return "MipsISD::MADDU_DSP"; + case MipsISD::MSUB_DSP: return "MipsISD::MSUB_DSP"; + case MipsISD::MSUBU_DSP: return "MipsISD::MSUBU_DSP"; default: return NULL; } } @@ -113,7 +150,22 @@ MipsTargetLowering(MipsTargetMachine &TM) if (Subtarget->inMips16Mode()) { addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass); - addRegisterClass(MVT::i32, &Mips::CPURARegRegClass); + } + + if (Subtarget->hasDSP()) { + MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; + + for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { + addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, VecTys[i], Expand); + + setOperationAction(ISD::LOAD, VecTys[i], Legal); + setOperationAction(ISD::STORE, VecTys[i], Legal); + setOperationAction(ISD::BITCAST, VecTys[i], Legal); + } } if (!TM.Options.UseSoftFloat) { @@ -160,10 +212,18 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - setOperationAction(ISD::LOAD, MVT::i32, Custom); - setOperationAction(ISD::STORE, MVT::i32, Custom); + if (Subtarget->inMips16Mode()) { + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + } + else { + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + } + if (!Subtarget->inMips16Mode()) { + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::i32, Custom); + } if (!TM.Options.NoNaNsFPMath) { setOperationAction(ISD::FABS, MVT::f32, Custom); @@ -187,6 +247,10 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } + setOperationAction(ISD::ADD, MVT::i32, Custom); + if (HasMips64) + setOperationAction(ISD::ADD, MVT::i64, Custom); + setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); @@ -254,6 +318,9 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); + // Use the default for now setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); @@ -263,6 +330,21 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); + if (Subtarget->inMips16Mode()) { + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); + } + setInsertFencesForAtomic(true); if (!Subtarget->hasSEInReg()) { @@ -310,6 +392,9 @@ MipsTargetLowering(MipsTargetMachine &TM) bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; + if (Subtarget->inMips16Mode()) + return false; + switch (SVT) { case MVT::i64: case MVT::i32: @@ -785,6 +870,26 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return SDValue(); } +void +MipsTargetLowering::LowerOperationWrapper(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + SDValue Res = LowerOperation(SDValue(N, 0), DAG); + + for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I) + Results.push_back(Res.getValue(I)); +} + +void +MipsTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + SDValue Res = LowerOperation(SDValue(N, 0), DAG); + + for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I) + Results.push_back(Res.getValue(I)); +} + SDValue MipsTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -811,6 +916,9 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SRL_PARTS: return LowerShiftRightParts(Op, DAG, false); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::ADD: return LowerADD(Op, DAG); } return SDValue(); } @@ -919,6 +1027,70 @@ static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB, return BB; } */ + +MachineBasicBlock * +MipsTargetLowering::EmitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ + // $bb: + // bposge32_pseudo $vr0 + // => + // $bb: + // bposge32 $tbb + // $fbb: + // li $vr2, 0 + // b $sink + // $tbb: + // li $vr1, 1 + // $sink: + // $vr0 = phi($vr2, $fbb, $vr1, $tbb) + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetRegisterClass *RC = &Mips::CPURegsRegClass; + DebugLoc DL = MI->getDebugLoc(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); + MachineFunction *F = BB->getParent(); + MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, FBB); + F->insert(It, TBB); + F->insert(It, Sink); + + // Transfer the remainder of BB and its successor edges to Sink. + Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + Sink->transferSuccessorsAndUpdatePHIs(BB); + + // Add successors. + BB->addSuccessor(FBB); + BB->addSuccessor(TBB); + FBB->addSuccessor(Sink); + TBB->addSuccessor(Sink); + + // Insert the real bposge32 instruction to $BB. + BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); + + // Fill $FBB. + unsigned VR2 = RegInfo.createVirtualRegister(RC); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) + .addReg(Mips::ZERO).addImm(0); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); + + // Fill $TBB. + unsigned VR1 = RegInfo.createVirtualRegister(RC); + BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) + .addReg(Mips::ZERO).addImm(1); + + // Insert phi function to $Sink. + BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), + MI->getOperand(0).getReg()) + .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return Sink; +} + MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -1027,6 +1199,8 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case Mips::ATOMIC_CMP_SWAP_I64: case Mips::ATOMIC_CMP_SWAP_I64_P8: return EmitAtomicCmpSwap(MI, BB, 8); + case Mips::BPOSGE32_PSEUDO: + return EmitBPOSGE32(MI, BB); } } @@ -1571,15 +1745,16 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { SDVTList VTs = DAG.getVTList(MVT::i32); - MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering(); + const MipsTargetObjectFile &TLOF = + (const MipsTargetObjectFile&)getObjFileLowering(); // %gp_rel relocation if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_GPREL); SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1); - SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); - return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode); + SDValue GPReg = DAG.getRegister(Mips::GP, MVT::i32); + return DAG.getNode(ISD::ADD, dl, MVT::i32, GPReg, GPRelNode); } // %hi/%lo relocation SDValue GAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, @@ -1620,8 +1795,10 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { // %hi/%lo relocation - SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true, MipsII::MO_ABS_HI); - SDValue BALo = DAG.getBlockAddress(BA, MVT::i32, true, MipsII::MO_ABS_LO); + SDValue BAHi = + DAG.getTargetBlockAddress(BA, MVT::i32, 0, MipsII::MO_ABS_HI); + SDValue BALo = + DAG.getTargetBlockAddress(BA, MVT::i32, 0, MipsII::MO_ABS_LO); SDValue Hi = DAG.getNode(MipsISD::Hi, dl, MVT::i32, BAHi); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALo); return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo); @@ -1630,10 +1807,10 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, EVT ValTy = Op.getValueType(); unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; unsigned OFSTFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; - SDValue BAGOTOffset = DAG.getBlockAddress(BA, ValTy, true, GOTFlag); + SDValue BAGOTOffset = DAG.getTargetBlockAddress(BA, ValTy, 0, GOTFlag); BAGOTOffset = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GetGlobalReg(DAG, ValTy), BAGOTOffset); - SDValue BALOOffset = DAG.getBlockAddress(BA, ValTy, true, OFSTFlag); + SDValue BALOOffset = DAG.getTargetBlockAddress(BA, ValTy, 0, OFSTFlag); SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), BAGOTOffset, MachinePointerInfo(), false, false, false, 0); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, BALOOffset); @@ -2224,6 +2401,172 @@ SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7); } +// This function expands mips intrinsic nodes which have 64-bit input operands +// or output values. +// +// out64 = intrinsic-node in64 +// => +// lo = copy (extract-element (in64, 0)) +// hi = copy (extract-element (in64, 1)) +// mips-specific-node +// v0 = copy lo +// v1 = copy hi +// out64 = merge-values (v0, v1) +// +static SDValue LowerDSPIntr(SDValue Op, SelectionDAG &DAG, + unsigned Opc, bool HasI64In, bool HasI64Out) { + DebugLoc DL = Op.getDebugLoc(); + bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; + SDValue Chain = HasChainIn ? Op->getOperand(0) : DAG.getEntryNode(); + SmallVector<SDValue, 3> Ops; + + if (HasI64In) { + SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, + Op->getOperand(1 + HasChainIn), + DAG.getConstant(0, MVT::i32)); + SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, + Op->getOperand(1 + HasChainIn), + DAG.getConstant(1, MVT::i32)); + + Chain = DAG.getCopyToReg(Chain, DL, Mips::LO, InLo, SDValue()); + Chain = DAG.getCopyToReg(Chain, DL, Mips::HI, InHi, Chain.getValue(1)); + + Ops.push_back(Chain); + Ops.append(Op->op_begin() + HasChainIn + 2, Op->op_end()); + Ops.push_back(Chain.getValue(1)); + } else { + Ops.push_back(Chain); + Ops.append(Op->op_begin() + HasChainIn + 1, Op->op_end()); + } + + if (!HasI64Out) + return DAG.getNode(Opc, DL, Op->value_begin(), Op->getNumValues(), + Ops.begin(), Ops.size()); + + SDValue Intr = DAG.getNode(Opc, DL, DAG.getVTList(MVT::Other, MVT::Glue), + Ops.begin(), Ops.size()); + SDValue OutLo = DAG.getCopyFromReg(Intr.getValue(0), DL, Mips::LO, MVT::i32, + Intr.getValue(1)); + SDValue OutHi = DAG.getCopyFromReg(OutLo.getValue(1), DL, Mips::HI, MVT::i32, + OutLo.getValue(2)); + SDValue Out = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, OutLo, OutHi); + + if (!HasChainIn) + return Out; + + SDValue Vals[] = { Out, OutHi.getValue(1) }; + return DAG.getMergeValues(Vals, 2, DL); +} + +SDValue MipsTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) { + default: + return SDValue(); + case Intrinsic::mips_shilo: + return LowerDSPIntr(Op, DAG, MipsISD::SHILO, true, true); + case Intrinsic::mips_dpau_h_qbl: + return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL, true, true); + case Intrinsic::mips_dpau_h_qbr: + return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR, true, true); + case Intrinsic::mips_dpsu_h_qbl: + return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL, true, true); + case Intrinsic::mips_dpsu_h_qbr: + return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR, true, true); + case Intrinsic::mips_dpa_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH, true, true); + case Intrinsic::mips_dps_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH, true, true); + case Intrinsic::mips_dpax_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH, true, true); + case Intrinsic::mips_dpsx_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH, true, true); + case Intrinsic::mips_mulsa_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH, true, true); + case Intrinsic::mips_mult: + return LowerDSPIntr(Op, DAG, MipsISD::MULT, false, true); + case Intrinsic::mips_multu: + return LowerDSPIntr(Op, DAG, MipsISD::MULTU, false, true); + case Intrinsic::mips_madd: + return LowerDSPIntr(Op, DAG, MipsISD::MADD_DSP, true, true); + case Intrinsic::mips_maddu: + return LowerDSPIntr(Op, DAG, MipsISD::MADDU_DSP, true, true); + case Intrinsic::mips_msub: + return LowerDSPIntr(Op, DAG, MipsISD::MSUB_DSP, true, true); + case Intrinsic::mips_msubu: + return LowerDSPIntr(Op, DAG, MipsISD::MSUBU_DSP, true, true); + } +} + +SDValue MipsTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) { + default: + return SDValue(); + case Intrinsic::mips_extp: + return LowerDSPIntr(Op, DAG, MipsISD::EXTP, true, false); + case Intrinsic::mips_extpdp: + return LowerDSPIntr(Op, DAG, MipsISD::EXTPDP, true, false); + case Intrinsic::mips_extr_w: + return LowerDSPIntr(Op, DAG, MipsISD::EXTR_W, true, false); + case Intrinsic::mips_extr_r_w: + return LowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W, true, false); + case Intrinsic::mips_extr_rs_w: + return LowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W, true, false); + case Intrinsic::mips_extr_s_h: + return LowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H, true, false); + case Intrinsic::mips_mthlip: + return LowerDSPIntr(Op, DAG, MipsISD::MTHLIP, true, true); + case Intrinsic::mips_mulsaq_s_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH, true, true); + case Intrinsic::mips_maq_s_w_phl: + return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL, true, true); + case Intrinsic::mips_maq_s_w_phr: + return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR, true, true); + case Intrinsic::mips_maq_sa_w_phl: + return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL, true, true); + case Intrinsic::mips_maq_sa_w_phr: + return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR, true, true); + case Intrinsic::mips_dpaq_s_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH, true, true); + case Intrinsic::mips_dpsq_s_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH, true, true); + case Intrinsic::mips_dpaq_sa_l_w: + return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W, true, true); + case Intrinsic::mips_dpsq_sa_l_w: + return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W, true, true); + case Intrinsic::mips_dpaqx_s_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH, true, true); + case Intrinsic::mips_dpaqx_sa_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH, true, true); + case Intrinsic::mips_dpsqx_s_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH, true, true); + case Intrinsic::mips_dpsqx_sa_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH, true, true); + } +} + +SDValue MipsTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const { + if (Op->getOperand(0).getOpcode() != ISD::FRAMEADDR + || cast<ConstantSDNode> + (Op->getOperand(0).getOperand(0))->getZExtValue() != 0 + || Op->getOperand(1).getOpcode() != ISD::FRAME_TO_ARGS_OFFSET) + return SDValue(); + + // The pattern + // (add (frameaddr 0), (frame_to_args_offset)) + // results from lowering llvm.eh.dwarf.cfa intrinsic. Transform it to + // (add FrameObject, 0) + // where FrameObject is a fixed StackObject with offset 0 which points to + // the old stack pointer. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + EVT ValTy = Op->getValueType(0); + int FI = MFI->CreateFixedObject(Op.getValueSizeInBits() / 8, 0, false); + SDValue InArgsAddr = DAG.getFrameIndex(FI, ValTy); + return DAG.getNode(ISD::ADD, Op->getDebugLoc(), ValTy, InArgsAddr, + DAG.getConstant(0, ValTy)); +} + //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -2259,16 +2602,9 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, Mips::D6, Mips::D7 }; - // ByVal Args - if (ArgFlags.isByVal()) { - State.HandleByVal(ValNo, ValVT, LocVT, LocInfo, - 1 /*MinSize*/, 4 /*MinAlign*/, ArgFlags); - unsigned NextReg = (State.getNextStackOffset() + 3) / 4; - for (unsigned r = State.getFirstUnallocated(IntRegs, IntRegsSize); - r < std::min(IntRegsSize, NextReg); ++r) - State.AllocateReg(IntRegs[r]); - return false; - } + // Do not process byval args here. + if (ArgFlags.isByVal()) + return true; // Promote i8 and i16 if (LocVT == MVT::i8 || LocVT == MVT::i16) { @@ -2323,279 +2659,72 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, } else llvm_unreachable("Cannot handle this ValVT."); - unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; - unsigned Offset = State.AllocateStack(SizeInBytes, OrigAlign); - - if (!Reg) + if (!Reg) { + unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() >> 3, + OrigAlign); State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - else + } else State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return false; // CC must always match -} - -static const uint16_t Mips64IntRegs[8] = - {Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64, - Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64}; -static const uint16_t Mips64DPRegs[8] = - {Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, - Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64}; - -static bool CC_Mips64Byval(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - unsigned Align = std::max(ArgFlags.getByValAlign(), (unsigned)8); - unsigned Size = (ArgFlags.getByValSize() + 7) / 8 * 8; - unsigned FirstIdx = State.getFirstUnallocated(Mips64IntRegs, 8); - - assert(Align <= 16 && "Cannot handle alignments larger than 16."); - - // If byval is 16-byte aligned, the first arg register must be even. - if ((Align == 16) && (FirstIdx % 2)) { - State.AllocateReg(Mips64IntRegs[FirstIdx], Mips64DPRegs[FirstIdx]); - ++FirstIdx; - } - - // Mark the registers allocated. - for (unsigned I = FirstIdx; Size && (I < 8); Size -= 8, ++I) - State.AllocateReg(Mips64IntRegs[I], Mips64DPRegs[I]); - - // Allocate space on caller's stack. - unsigned Offset = State.AllocateStack(Size, Align); - - if (FirstIdx < 8) - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Mips64IntRegs[FirstIdx], - LocVT, LocInfo)); - else - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - - return true; + return false; } #include "MipsGenCallingConv.inc" -static void -AnalyzeMips64CallOperands(CCState &CCInfo, - const SmallVectorImpl<ISD::OutputArg> &Outs) { - unsigned NumOps = Outs.size(); - for (unsigned i = 0; i != NumOps; ++i) { - MVT ArgVT = Outs[i].VT; - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - bool R; - - if (Outs[i].IsFixed) - R = CC_MipsN(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); - else - R = CC_MipsN_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); - - if (R) { -#ifndef NDEBUG - dbgs() << "Call operand #" << i << " has unhandled type " - << EVT(ArgVT).getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - //===----------------------------------------------------------------------===// // Call Calling Convention Implementation //===----------------------------------------------------------------------===// static const unsigned O32IntRegsSize = 4; -static const uint16_t O32IntRegs[] = { - Mips::A0, Mips::A1, Mips::A2, Mips::A3 -}; - // Return next O32 integer argument register. static unsigned getNextIntArgReg(unsigned Reg) { assert((Reg == Mips::A0) || (Reg == Mips::A2)); return (Reg == Mips::A0) ? Mips::A1 : Mips::A3; } -// Write ByVal Arg to arg registers and stack. -static void -WriteByValArg(SDValue Chain, DebugLoc dl, - SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, - SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr, - MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, - const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, - MVT PtrType, bool isLittle) { - unsigned LocMemOffset = VA.getLocMemOffset(); - unsigned Offset = 0; - uint32_t RemainingSize = Flags.getByValSize(); - unsigned ByValAlign = Flags.getByValAlign(); - - // Copy the first 4 words of byval arg to registers A0 - A3. - // FIXME: Use a stricter alignment if it enables better optimization in passes - // run later. - for (; RemainingSize >= 4 && LocMemOffset < 4 * 4; - Offset += 4, RemainingSize -= 4, LocMemOffset += 4) { - SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant(Offset, MVT::i32)); - SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr, - MachinePointerInfo(), false, false, false, - std::min(ByValAlign, (unsigned )4)); - MemOpChains.push_back(LoadVal.getValue(1)); - unsigned DstReg = O32IntRegs[LocMemOffset / 4]; - RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); - } - - if (RemainingSize == 0) - return; +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. +bool MipsTargetLowering:: +IsEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const { + if (!EnableMipsTailCalls) + return false; - // If there still is a register available for argument passing, write the - // remaining part of the structure to it using subword loads and shifts. - if (LocMemOffset < 4 * 4) { - assert(RemainingSize <= 3 && RemainingSize >= 1 && - "There must be one to three bytes remaining."); - unsigned LoadSize = (RemainingSize == 3 ? 2 : RemainingSize); - SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant(Offset, MVT::i32)); - unsigned Alignment = std::min(ByValAlign, (unsigned )4); - SDValue LoadVal = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, - LoadPtr, MachinePointerInfo(), - MVT::getIntegerVT(LoadSize * 8), false, - false, Alignment); - MemOpChains.push_back(LoadVal.getValue(1)); - - // If target is big endian, shift it to the most significant half-word or - // byte. - if (!isLittle) - LoadVal = DAG.getNode(ISD::SHL, dl, MVT::i32, LoadVal, - DAG.getConstant(32 - LoadSize * 8, MVT::i32)); - - Offset += LoadSize; - RemainingSize -= LoadSize; - - // Read second subword if necessary. - if (RemainingSize != 0) { - assert(RemainingSize == 1 && "There must be one byte remaining."); - LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant(Offset, MVT::i32)); - unsigned Alignment = std::min(ByValAlign, (unsigned )2); - SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, - LoadPtr, MachinePointerInfo(), - MVT::i8, false, false, Alignment); - MemOpChains.push_back(Subword.getValue(1)); - // Insert the loaded byte to LoadVal. - // FIXME: Use INS if supported by target. - unsigned ShiftAmt = isLittle ? 16 : 8; - SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i32, Subword, - DAG.getConstant(ShiftAmt, MVT::i32)); - LoadVal = DAG.getNode(ISD::OR, dl, MVT::i32, LoadVal, Shift); - } + // No tail call optimization for mips16. + if (Subtarget->inMips16Mode()) + return false; - unsigned DstReg = O32IntRegs[LocMemOffset / 4]; - RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); - return; - } + // Return false if either the callee or caller has a byval argument. + if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) + return false; - // Copy remaining part of byval arg using memcpy. - SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant(Offset, MVT::i32)); - SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, - DAG.getIntPtrConstant(LocMemOffset)); - Chain = DAG.getMemcpy(Chain, dl, Dst, Src, - DAG.getConstant(RemainingSize, MVT::i32), - std::min(ByValAlign, (unsigned)4), - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); - MemOpChains.push_back(Chain); + // Return true if the callee's argument area is no larger than the + // caller's. + return NextStackOffset <= FI.getIncomingArgSize(); } -// Copy Mips64 byVal arg to registers and stack. -void static -PassByValArg64(SDValue Chain, DebugLoc dl, - SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, - SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr, - MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, - const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, - EVT PtrTy, bool isLittle) { - unsigned ByValSize = Flags.getByValSize(); - unsigned Alignment = std::min(Flags.getByValAlign(), (unsigned)8); - bool IsRegLoc = VA.isRegLoc(); - unsigned Offset = 0; // Offset in # of bytes from the beginning of struct. - unsigned LocMemOffset = 0; - unsigned MemCpySize = ByValSize; - - if (!IsRegLoc) - LocMemOffset = VA.getLocMemOffset(); - else { - const uint16_t *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8, - VA.getLocReg()); - const uint16_t *RegEnd = Mips64IntRegs + 8; - - // Copy double words to registers. - for (; (Reg != RegEnd) && (ByValSize >= Offset + 8); ++Reg, Offset += 8) { - SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, - DAG.getConstant(Offset, PtrTy)); - SDValue LoadVal = DAG.getLoad(MVT::i64, dl, Chain, LoadPtr, - MachinePointerInfo(), false, false, false, - Alignment); - MemOpChains.push_back(LoadVal.getValue(1)); - RegsToPass.push_back(std::make_pair(*Reg, LoadVal)); - } - - // Return if the struct has been fully copied. - if (!(MemCpySize = ByValSize - Offset)) - return; - - // If there is an argument register available, copy the remainder of the - // byval argument with sub-doubleword loads and shifts. - if (Reg != RegEnd) { - assert((ByValSize < Offset + 8) && - "Size of the remainder should be smaller than 8-byte."); - SDValue Val; - for (unsigned LoadSize = 4; Offset < ByValSize; LoadSize /= 2) { - unsigned RemSize = ByValSize - Offset; - - if (RemSize < LoadSize) - continue; - - SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, - DAG.getConstant(Offset, PtrTy)); - SDValue LoadVal = - DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i64, Chain, LoadPtr, - MachinePointerInfo(), MVT::getIntegerVT(LoadSize * 8), - false, false, Alignment); - MemOpChains.push_back(LoadVal.getValue(1)); - - // Offset in number of bits from double word boundary. - unsigned OffsetDW = (Offset % 8) * 8; - unsigned Shamt = isLittle ? OffsetDW : 64 - (OffsetDW + LoadSize * 8); - SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i64, LoadVal, - DAG.getConstant(Shamt, MVT::i32)); - - Val = Val.getNode() ? DAG.getNode(ISD::OR, dl, MVT::i64, Val, Shift) : - Shift; - Offset += LoadSize; - Alignment = std::min(Alignment, LoadSize); - } - - RegsToPass.push_back(std::make_pair(*Reg, Val)); - return; - } +SDValue +MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, + SDValue Chain, SDValue Arg, DebugLoc DL, + bool IsTailCall, SelectionDAG &DAG) const { + if (!IsTailCall) { + SDValue PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, + DAG.getIntPtrConstant(Offset)); + return DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo(), false, + false, 0); } - assert(MemCpySize && "MemCpySize must not be zero."); - - // Copy remainder of byval arg to it with memcpy. - SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, - DAG.getConstant(Offset, PtrTy)); - SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, - DAG.getIntPtrConstant(LocMemOffset)); - Chain = DAG.getMemcpy(Chain, dl, Dst, Src, - DAG.getConstant(MemCpySize, PtrTy), Alignment, - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); - MemOpChains.push_back(Chain); + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + int FI = MFI->CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(), + /*isVolatile=*/ true, false, 0); } /// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. -/// TODO: isTailCall. SDValue MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { @@ -2610,56 +2739,49 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; - // MIPs target does not yet support tail call optimization. - isTailCall = false; - MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering(); bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); + MipsCC MipsCCInfo(CallConv, isVarArg, IsO32, CCInfo); - if (CallConv == CallingConv::Fast) - CCInfo.AnalyzeCallOperands(Outs, CC_Mips_FastCC); - else if (IsO32) - CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32); - else if (HasMips64) - AnalyzeMips64CallOperands(CCInfo, Outs); - else - CCInfo.AnalyzeCallOperands(Outs, CC_Mips); + MipsCCInfo.analyzeCallOperands(Outs); // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); - unsigned StackAlignment = TFL->getStackAlignment(); - NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment); - // Update size of the maximum argument space. - // For O32, a minimum of four words (16 bytes) of argument space is - // allocated. - if (IsO32 && (CallConv != CallingConv::Fast)) - NextStackOffset = std::max(NextStackOffset, (unsigned)16); + // Check if it's really possible to do a tail call. + if (isTailCall) + isTailCall = + IsEligibleForTailCallOptimization(MipsCCInfo, NextStackOffset, + *MF.getInfo<MipsFunctionInfo>()); + + if (isTailCall) + ++NumTailCalls; // Chain is the output chain of the last Load/Store or CopyToReg node. // ByValChain is the output chain of the last Memcpy node created for copying // byval arguments to the stack. + unsigned StackAlignment = TFL->getStackAlignment(); + NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment); SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); - Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal); + + if (!isTailCall) + Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, IsN64 ? Mips::SP_64 : Mips::SP, getPointerTy()); - if (MipsFI->getMaxCallFrameSize() < NextStackOffset) - MipsFI->setMaxCallFrameSize(NextStackOffset); - // With EABI is it possible to have 16 args on registers. SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; SmallVector<SDValue, 8> MemOpChains; + MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin(); // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -2672,14 +2794,12 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Flags.isByVal()) { assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); - if (IsO32) - WriteByValArg(Chain, dl, RegsToPass, MemOpChains, StackPtr, - MFI, DAG, Arg, VA, Flags, getPointerTy(), - Subtarget->isLittle()); - else - PassByValArg64(Chain, dl, RegsToPass, MemOpChains, StackPtr, - MFI, DAG, Arg, VA, Flags, getPointerTy(), - Subtarget->isLittle()); + assert(ByValArg != MipsCCInfo.byval_end()); + assert(!isTailCall && + "Do not tail-call optimize if there is a byval argument."); + passByValArg(Chain, dl, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, + MipsCCInfo, *ByValArg, Flags, Subtarget->isLittle()); + ++ByValArg; continue; } @@ -2729,10 +2849,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // emit ISD::STORE whichs stores the // parameter value to a stack Location - SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, - DAG.getIntPtrConstant(VA.getLocMemOffset())); - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo(), false, false, 0)); + MemOpChains.push_back(passArgOnStack(StackPtr, VA.getLocMemOffset(), + Chain, Arg, dl, isTailCall, DAG)); } // Transform all store nodes into one single node because all store @@ -2861,6 +2979,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (InFlag.getNode()) Ops.push_back(InFlag); + if (isTailCall) + return DAG.getNode(MipsISD::TailCall, dl, MVT::Other, &Ops[0], Ops.size()); + Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); @@ -2904,70 +3025,6 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, //===----------------------------------------------------------------------===// // Formal Arguments Calling Convention Implementation //===----------------------------------------------------------------------===// -static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, - std::vector<SDValue> &OutChains, - SelectionDAG &DAG, unsigned NumWords, SDValue FIN, - const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, - const Argument *FuncArg) { - unsigned LocMem = VA.getLocMemOffset(); - unsigned FirstWord = LocMem / 4; - - // copy register A0 - A3 to frame object - for (unsigned i = 0; i < NumWords; ++i) { - unsigned CurWord = FirstWord + i; - if (CurWord >= O32IntRegsSize) - break; - - unsigned SrcReg = O32IntRegs[CurWord]; - unsigned Reg = AddLiveIn(MF, SrcReg, &Mips::CPURegsRegClass); - SDValue StorePtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIN, - DAG.getConstant(i * 4, MVT::i32)); - SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(Reg, MVT::i32), - StorePtr, MachinePointerInfo(FuncArg, i * 4), - false, false, 0); - OutChains.push_back(Store); - } -} - -// Create frame object on stack and copy registers used for byval passing to it. -static unsigned -CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, - std::vector<SDValue> &OutChains, SelectionDAG &DAG, - const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, - MachineFrameInfo *MFI, bool IsRegLoc, - SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI, - EVT PtrTy, const Argument *FuncArg) { - const uint16_t *Reg = Mips64IntRegs + 8; - int FOOffset; // Frame object offset from virtual frame pointer. - - if (IsRegLoc) { - Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8, VA.getLocReg()); - FOOffset = (Reg - Mips64IntRegs) * 8 - 8 * 8; - } - else - FOOffset = VA.getLocMemOffset(); - - // Create frame object. - unsigned NumRegs = (Flags.getByValSize() + 7) / 8; - unsigned LastFI = MFI->CreateFixedObject(NumRegs * 8, FOOffset, true); - SDValue FIN = DAG.getFrameIndex(LastFI, PtrTy); - InVals.push_back(FIN); - - // Copy arg registers. - for (unsigned I = 0; (Reg != Mips64IntRegs + 8) && (I < NumRegs); - ++Reg, ++I) { - unsigned VReg = AddLiveIn(MF, *Reg, &Mips::CPU64RegsRegClass); - SDValue StorePtr = DAG.getNode(ISD::ADD, dl, PtrTy, FIN, - DAG.getConstant(I * 8, PtrTy)); - SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(VReg, MVT::i64), - StorePtr, MachinePointerInfo(FuncArg, I * 8), - false, false, 0); - OutChains.push_back(Store); - } - - return LastFI; -} - /// LowerFormalArguments - transform physical registers into virtual registers /// and generate load operations for arguments places on the stack. SDValue @@ -2991,20 +3048,21 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); + MipsCC MipsCCInfo(CallConv, isVarArg, IsO32, CCInfo); - if (CallConv == CallingConv::Fast) - CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FastCC); - else if (IsO32) - CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32); - else - CCInfo.AnalyzeFormalArguments(Ins, CC_Mips); + MipsCCInfo.analyzeFormalArguments(Ins); + MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(), + MipsCCInfo.hasByValArg()); Function::const_arg_iterator FuncArg = DAG.getMachineFunction().getFunction()->arg_begin(); - int LastFI = 0;// MipsFI->LastInArgFI is 0 at the entry of this function. + unsigned CurArgIdx = 0; + MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin(); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++FuncArg) { + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; + std::advance(FuncArg, Ins[i].OrigArgIndex - CurArgIdx); + CurArgIdx = Ins[i].OrigArgIndex; EVT ValVT = VA.getValVT(); ISD::ArgFlagsTy Flags = Ins[i].Flags; bool IsRegLoc = VA.isRegLoc(); @@ -3012,18 +3070,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, if (Flags.isByVal()) { assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); - if (IsO32) { - unsigned NumWords = (Flags.getByValSize() + 3) / 4; - LastFI = MFI->CreateFixedObject(NumWords * 4, VA.getLocMemOffset(), - true); - SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy()); - InVals.push_back(FIN); - ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags, - &*FuncArg); - } else // N32/64 - LastFI = CopyMips64ByValRegs(MF, Chain, dl, OutChains, DAG, VA, Flags, - MFI, IsRegLoc, InVals, MipsFI, - getPointerTy(), &*FuncArg); + assert(ByValArg != MipsCCInfo.byval_end()); + copyByValRegs(Chain, dl, OutChains, DAG, Flags, InVals, &*FuncArg, + MipsCCInfo, *ByValArg); + ++ByValArg; continue; } @@ -3085,13 +3135,13 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, assert(VA.isMemLoc()); // The stack pointer offset is relative to the caller stack frame. - LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, + int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), true); // Create load nodes to retrieve arguments from the stack - SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); InVals.push_back(DAG.getLoad(ValVT, dl, Chain, FIN, - MachinePointerInfo::getFixedStack(LastFI), + MachinePointerInfo::getFixedStack(FI), false, false, false, 0)); } } @@ -3102,55 +3152,16 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { unsigned Reg = MipsFI->getSRetReturnReg(); if (!Reg) { - Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i32)); + Reg = MF.getRegInfo(). + createVirtualRegister(getRegClassFor(IsN64 ? MVT::i64 : MVT::i32)); MipsFI->setSRetReturnReg(Reg); } SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]); Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); } - if (isVarArg) { - unsigned NumOfRegs = IsO32 ? 4 : 8; - const uint16_t *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs; - unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumOfRegs); - int FirstRegSlotOffset = IsO32 ? 0 : -64 ; // offset of $a0's slot. - const TargetRegisterClass *RC = IsO32 ? - (const TargetRegisterClass*)&Mips::CPURegsRegClass : - (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; - unsigned RegSize = RC->getSize(); - int RegSlotOffset = FirstRegSlotOffset + Idx * RegSize; - - // Offset of the first variable argument from stack pointer. - int FirstVaArgOffset; - - if (IsO32 || (Idx == NumOfRegs)) { - FirstVaArgOffset = - (CCInfo.getNextStackOffset() + RegSize - 1) / RegSize * RegSize; - } else - FirstVaArgOffset = RegSlotOffset; - - // Record the frame index of the first variable argument - // which is a value necessary to VASTART. - LastFI = MFI->CreateFixedObject(RegSize, FirstVaArgOffset, true); - MipsFI->setVarArgsFrameIndex(LastFI); - - // Copy the integer registers that have not been used for argument passing - // to the argument register save area. For O32, the save area is allocated - // in the caller's stack frame, while for N32/64, it is allocated in the - // callee's stack frame. - for (int StackOffset = RegSlotOffset; - Idx < NumOfRegs; ++Idx, StackOffset += RegSize) { - unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegs[Idx], RC); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, - MVT::getIntegerVT(RegSize * 8)); - LastFI = MFI->CreateFixedObject(RegSize, StackOffset, true); - SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy()); - OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, - MachinePointerInfo(), false, false, 0)); - } - } - - MipsFI->setLastInArgFI(LastFI); + if (isVarArg) + writeVarArgRegs(OutChains, MipsCCInfo, Chain, dl, DAG); // All stores are grouped in one node to allow the matching between // the size of Ins and InVals. This only happens when on varg functions @@ -3167,6 +3178,17 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// +bool +MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), + RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_Mips); +} + SDValue MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -3219,9 +3241,11 @@ MipsTargetLowering::LowerReturn(SDValue Chain, if (!Reg) llvm_unreachable("sret virtual register not created in the entry block"); SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); + unsigned V0 = IsN64 ? Mips::V0_64 : Mips::V0; - Chain = DAG.getCopyToReg(Chain, dl, Mips::V0, Val, Flag); + Chain = DAG.getCopyToReg(Chain, dl, V0, Val, Flag); Flag = Chain.getValue(1); + MF.getRegInfo().addLiveOut(V0); } // Return on Mips is always a "jr $ra" @@ -3325,8 +3349,11 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const case 'd': // Address register. Same as 'r' unless generating MIPS16 code. case 'y': // Same as 'r'. Exists for compatibility. case 'r': - if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) + if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) { + if (Subtarget->inMips16Mode()) + return std::make_pair(0U, &Mips::CPU16RegsRegClass); return std::make_pair(0U, &Mips::CPURegsRegClass); + } if (VT == MVT::i64 && !HasMips64) return std::make_pair(0U, &Mips::CPURegsRegClass); if (VT == MVT::i64 && HasMips64) @@ -3485,3 +3512,316 @@ unsigned MipsTargetLowering::getJumpTableEncoding() const { return TargetLowering::getJumpTableEncoding(); } + +MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CallConv, bool IsVarArg, + bool IsO32, CCState &Info) : CCInfo(Info) { + UseRegsForByval = true; + + if (IsO32) { + RegSize = 4; + NumIntArgRegs = array_lengthof(O32IntRegs); + ReservedArgArea = 16; + IntArgRegs = ShadowRegs = O32IntRegs; + FixedFn = VarFn = CC_MipsO32; + } else { + RegSize = 8; + NumIntArgRegs = array_lengthof(Mips64IntRegs); + ReservedArgArea = 0; + IntArgRegs = Mips64IntRegs; + ShadowRegs = Mips64DPRegs; + FixedFn = CC_MipsN; + VarFn = CC_MipsN_VarArg; + } + + if (CallConv == CallingConv::Fast) { + assert(!IsVarArg); + UseRegsForByval = false; + ReservedArgArea = 0; + FixedFn = VarFn = CC_Mips_FastCC; + } + + // Pre-allocate reserved argument area. + CCInfo.AllocateStack(ReservedArgArea, 1); +} + +void MipsTargetLowering::MipsCC:: +analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args) { + unsigned NumOpnds = Args.size(); + + for (unsigned I = 0; I != NumOpnds; ++I) { + MVT ArgVT = Args[I].VT; + ISD::ArgFlagsTy ArgFlags = Args[I].Flags; + bool R; + + if (ArgFlags.isByVal()) { + handleByValArg(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags); + continue; + } + + if (Args[I].IsFixed) + R = FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); + else + R = VarFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); + + if (R) { +#ifndef NDEBUG + dbgs() << "Call operand #" << I << " has unhandled type " + << EVT(ArgVT).getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +void MipsTargetLowering::MipsCC:: +analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args) { + unsigned NumArgs = Args.size(); + + for (unsigned I = 0; I != NumArgs; ++I) { + MVT ArgVT = Args[I].VT; + ISD::ArgFlagsTy ArgFlags = Args[I].Flags; + + if (ArgFlags.isByVal()) { + handleByValArg(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags); + continue; + } + + if (!FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) + continue; + +#ifndef NDEBUG + dbgs() << "Formal Arg #" << I << " has unhandled type " + << EVT(ArgVT).getEVTString(); +#endif + llvm_unreachable(0); + } +} + +void +MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT, + MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags) { + assert(ArgFlags.getByValSize() && "Byval argument's size shouldn't be 0."); + + struct ByValArgInfo ByVal; + unsigned ByValSize = RoundUpToAlignment(ArgFlags.getByValSize(), RegSize); + unsigned Align = std::min(std::max(ArgFlags.getByValAlign(), RegSize), + RegSize * 2); + + if (UseRegsForByval) + allocateRegs(ByVal, ByValSize, Align); + + // Allocate space on caller's stack. + ByVal.Address = CCInfo.AllocateStack(ByValSize - RegSize * ByVal.NumRegs, + Align); + CCInfo.addLoc(CCValAssign::getMem(ValNo, ValVT, ByVal.Address, LocVT, + LocInfo)); + ByValArgs.push_back(ByVal); +} + +void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal, + unsigned ByValSize, + unsigned Align) { + assert(!(ByValSize % RegSize) && !(Align % RegSize) && + "Byval argument's size and alignment should be a multiple of" + "RegSize."); + + ByVal.FirstIdx = CCInfo.getFirstUnallocated(IntArgRegs, NumIntArgRegs); + + // If Align > RegSize, the first arg register must be even. + if ((Align > RegSize) && (ByVal.FirstIdx % 2)) { + CCInfo.AllocateReg(IntArgRegs[ByVal.FirstIdx], ShadowRegs[ByVal.FirstIdx]); + ++ByVal.FirstIdx; + } + + // Mark the registers allocated. + for (unsigned I = ByVal.FirstIdx; ByValSize && (I < NumIntArgRegs); + ByValSize -= RegSize, ++I, ++ByVal.NumRegs) + CCInfo.AllocateReg(IntArgRegs[I], ShadowRegs[I]); +} + +void MipsTargetLowering:: +copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains, + SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags, + SmallVectorImpl<SDValue> &InVals, const Argument *FuncArg, + const MipsCC &CC, const ByValArgInfo &ByVal) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned RegAreaSize = ByVal.NumRegs * CC.regSize(); + unsigned FrameObjSize = std::max(Flags.getByValSize(), RegAreaSize); + int FrameObjOffset; + + if (RegAreaSize) + FrameObjOffset = (int)CC.reservedArgArea() - + (int)((CC.numIntArgRegs() - ByVal.FirstIdx) * CC.regSize()); + else + FrameObjOffset = ByVal.Address; + + // Create frame object. + EVT PtrTy = getPointerTy(); + int FI = MFI->CreateFixedObject(FrameObjSize, FrameObjOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrTy); + InVals.push_back(FIN); + + if (!ByVal.NumRegs) + return; + + // Copy arg registers. + EVT RegTy = MVT::getIntegerVT(CC.regSize() * 8); + const TargetRegisterClass *RC = getRegClassFor(RegTy); + + for (unsigned I = 0; I < ByVal.NumRegs; ++I) { + unsigned ArgReg = CC.intArgRegs()[ByVal.FirstIdx + I]; + unsigned VReg = AddLiveIn(MF, ArgReg, RC); + unsigned Offset = I * CC.regSize(); + SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN, + DAG.getConstant(Offset, PtrTy)); + SDValue Store = DAG.getStore(Chain, DL, DAG.getRegister(VReg, RegTy), + StorePtr, MachinePointerInfo(FuncArg, Offset), + false, false, 0); + OutChains.push_back(Store); + } +} + +// Copy byVal arg to registers and stack. +void MipsTargetLowering:: +passByValArg(SDValue Chain, DebugLoc DL, + SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, + SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr, + MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, + const MipsCC &CC, const ByValArgInfo &ByVal, + const ISD::ArgFlagsTy &Flags, bool isLittle) const { + unsigned ByValSize = Flags.getByValSize(); + unsigned Offset = 0; // Offset in # of bytes from the beginning of struct. + unsigned RegSize = CC.regSize(); + unsigned Alignment = std::min(Flags.getByValAlign(), RegSize); + EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSize * 8); + + if (ByVal.NumRegs) { + const uint16_t *ArgRegs = CC.intArgRegs(); + bool LeftoverBytes = (ByVal.NumRegs * RegSize > ByValSize); + unsigned I = 0; + + // Copy words to registers. + for (; I < ByVal.NumRegs - LeftoverBytes; ++I, Offset += RegSize) { + SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, + DAG.getConstant(Offset, PtrTy)); + SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr, + MachinePointerInfo(), false, false, false, + Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + unsigned ArgReg = ArgRegs[ByVal.FirstIdx + I]; + RegsToPass.push_back(std::make_pair(ArgReg, LoadVal)); + } + + // Return if the struct has been fully copied. + if (ByValSize == Offset) + return; + + // Copy the remainder of the byval argument with sub-word loads and shifts. + if (LeftoverBytes) { + assert((ByValSize > Offset) && (ByValSize < Offset + RegSize) && + "Size of the remainder should be smaller than RegSize."); + SDValue Val; + + for (unsigned LoadSize = RegSize / 2, TotalSizeLoaded = 0; + Offset < ByValSize; LoadSize /= 2) { + unsigned RemSize = ByValSize - Offset; + + if (RemSize < LoadSize) + continue; + + // Load subword. + SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, + DAG.getConstant(Offset, PtrTy)); + SDValue LoadVal = + DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, + MachinePointerInfo(), MVT::getIntegerVT(LoadSize * 8), + false, false, Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + + // Shift the loaded value. + unsigned Shamt; + + if (isLittle) + Shamt = TotalSizeLoaded; + else + Shamt = (RegSize - (TotalSizeLoaded + LoadSize)) * 8; + + SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal, + DAG.getConstant(Shamt, MVT::i32)); + + if (Val.getNode()) + Val = DAG.getNode(ISD::OR, DL, RegTy, Val, Shift); + else + Val = Shift; + + Offset += LoadSize; + TotalSizeLoaded += LoadSize; + Alignment = std::min(Alignment, LoadSize); + } + + unsigned ArgReg = ArgRegs[ByVal.FirstIdx + I]; + RegsToPass.push_back(std::make_pair(ArgReg, Val)); + return; + } + } + + // Copy remainder of byval arg to it with memcpy. + unsigned MemCpySize = ByValSize - Offset; + SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, + DAG.getConstant(Offset, PtrTy)); + SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr, + DAG.getIntPtrConstant(ByVal.Address)); + Chain = DAG.getMemcpy(Chain, DL, Dst, Src, + DAG.getConstant(MemCpySize, PtrTy), Alignment, + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); + MemOpChains.push_back(Chain); +} + +void +MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains, + const MipsCC &CC, SDValue Chain, + DebugLoc DL, SelectionDAG &DAG) const { + unsigned NumRegs = CC.numIntArgRegs(); + const uint16_t *ArgRegs = CC.intArgRegs(); + const CCState &CCInfo = CC.getCCInfo(); + unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumRegs); + unsigned RegSize = CC.regSize(); + EVT RegTy = MVT::getIntegerVT(RegSize * 8); + const TargetRegisterClass *RC = getRegClassFor(RegTy); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + // Offset of the first variable argument from stack pointer. + int VaArgOffset; + + if (NumRegs == Idx) + VaArgOffset = RoundUpToAlignment(CCInfo.getNextStackOffset(), RegSize); + else + VaArgOffset = + (int)CC.reservedArgArea() - (int)(RegSize * (NumRegs - Idx)); + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + int FI = MFI->CreateFixedObject(RegSize, VaArgOffset, true); + MipsFI->setVarArgsFrameIndex(FI); + + // Copy the integer registers that have not been used for argument passing + // to the argument register save area. For O32, the save area is allocated + // in the caller's stack frame, while for N32/64, it is allocated in the + // callee's stack frame. + for (unsigned I = Idx; I < NumRegs; ++I, VaArgOffset += RegSize) { + unsigned Reg = AddLiveIn(MF, ArgRegs[I], RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); + FI = MFI->CreateFixedObject(RegSize, VaArgOffset, true); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); + SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, + MachinePointerInfo(), false, false, 0); + cast<StoreSDNode>(Store.getNode())->getMemOperand()->setValue(0); + OutChains.push_back(Store); + } +} diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h index 95ea8fa..43f97e8 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h @@ -17,6 +17,7 @@ #include "Mips.h" #include "MipsSubtarget.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" @@ -29,6 +30,9 @@ namespace llvm { // Jump and link (call) JmpLink, + // Tail call + TailCall, + // Get the Higher 16 bits from a 32-bit immediate // No relation with Mips Hi register Hi, @@ -81,6 +85,47 @@ namespace llvm { Ext, Ins, + // EXTR.W instrinsic nodes. + EXTP, + EXTPDP, + EXTR_S_H, + EXTR_W, + EXTR_R_W, + EXTR_RS_W, + SHILO, + MTHLIP, + + // DPA.W intrinsic nodes. + MULSAQ_S_W_PH, + MAQ_S_W_PHL, + MAQ_S_W_PHR, + MAQ_SA_W_PHL, + MAQ_SA_W_PHR, + DPAU_H_QBL, + DPAU_H_QBR, + DPSU_H_QBL, + DPSU_H_QBR, + DPAQ_S_W_PH, + DPSQ_S_W_PH, + DPAQ_SA_L_W, + DPSQ_SA_L_W, + DPA_W_PH, + DPS_W_PH, + DPAQX_S_W_PH, + DPAQX_SA_W_PH, + DPAX_W_PH, + DPSX_W_PH, + DPSQX_S_W_PH, + DPSQX_SA_W_PH, + MULSA_W_PH, + + MULT, + MULTU, + MADD_DSP, + MADDU_DSP, + MSUB_DSP, + MSUBU_DSP, + // Load/Store Left/Right nodes. LWL = ISD::FIRST_TARGET_MEMORY_OPCODE, LWR, @@ -96,6 +141,7 @@ namespace llvm { //===--------------------------------------------------------------------===// // TargetLowering Implementation //===--------------------------------------------------------------------===// + class MipsFunctionInfo; class MipsTargetLowering : public TargetLowering { public: @@ -105,9 +151,19 @@ namespace llvm { virtual bool allowsUnalignedMemoryAccesses (EVT VT) const; + virtual void LowerOperationWrapper(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const; + /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + /// ReplaceNodeResults - Replace the results of node with an illegal result + /// type with new values built out of custom code. + /// + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, + SelectionDAG &DAG) const; + /// getTargetNodeName - This method returns the name of a target specific // DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; @@ -117,6 +173,69 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; private: + + /// ByValArgInfo - Byval argument information. + struct ByValArgInfo { + unsigned FirstIdx; // Index of the first register used. + unsigned NumRegs; // Number of registers used for this argument. + unsigned Address; // Offset of the stack area used to pass this argument. + + ByValArgInfo() : FirstIdx(0), NumRegs(0), Address(0) {} + }; + + /// MipsCC - This class provides methods used to analyze formal and call + /// arguments and inquire about calling convention information. + class MipsCC { + public: + MipsCC(CallingConv::ID CallConv, bool IsVarArg, bool IsO32, + CCState &Info); + + void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs); + void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins); + void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags); + + const CCState &getCCInfo() const { return CCInfo; } + + /// hasByValArg - Returns true if function has byval arguments. + bool hasByValArg() const { return !ByValArgs.empty(); } + + /// useRegsForByval - Returns true if the calling convention allows the + /// use of registers to pass byval arguments. + bool useRegsForByval() const { return UseRegsForByval; } + + /// regSize - Size (in number of bits) of integer registers. + unsigned regSize() const { return RegSize; } + + /// numIntArgRegs - Number of integer registers available for calls. + unsigned numIntArgRegs() const { return NumIntArgRegs; } + + /// reservedArgArea - The size of the area the caller reserves for + /// register arguments. This is 16-byte if ABI is O32. + unsigned reservedArgArea() const { return ReservedArgArea; } + + /// intArgRegs - Pointer to array of integer registers. + const uint16_t *intArgRegs() const { return IntArgRegs; } + + typedef SmallVector<ByValArgInfo, 2>::const_iterator byval_iterator; + byval_iterator byval_begin() const { return ByValArgs.begin(); } + byval_iterator byval_end() const { return ByValArgs.end(); } + + private: + void allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize, + unsigned Align); + + CCState &CCInfo; + bool UseRegsForByval; + unsigned RegSize; + unsigned NumIntArgRegs; + unsigned ReservedArgArea; + const uint16_t *IntArgRegs, *ShadowRegs; + SmallVector<ByValArgInfo, 2> ByValArgs; + llvm::CCAssignFn *FixedFn, *VarFn; + }; + // Subtarget Info const MipsSubtarget *Subtarget; @@ -151,6 +270,39 @@ namespace llvm { bool IsSRA) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const; + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. + bool IsEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const; + + /// copyByValArg - Copy argument registers which were used to pass a byval + /// argument to the stack. Create a stack frame object for the byval + /// argument. + void copyByValRegs(SDValue Chain, DebugLoc DL, + std::vector<SDValue> &OutChains, SelectionDAG &DAG, + const ISD::ArgFlagsTy &Flags, + SmallVectorImpl<SDValue> &InVals, + const Argument *FuncArg, + const MipsCC &CC, const ByValArgInfo &ByVal) const; + + /// passByValArg - Pass a byval argument in registers or on stack. + void passByValArg(SDValue Chain, DebugLoc DL, + SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, + SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr, + MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, + const MipsCC &CC, const ByValArgInfo &ByVal, + const ISD::ArgFlagsTy &Flags, bool isLittle) const; + + /// writeVarArgRegs - Write variable function arguments passed in registers + /// to the stack. Also create a stack frame object for the first variable + /// argument. + void writeVarArgRegs(std::vector<SDValue> &OutChains, const MipsCC &CC, + SDValue Chain, DebugLoc DL, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, @@ -159,10 +311,20 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + SDValue passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, + SDValue Arg, DebugLoc DL, bool IsTailCall, + SelectionDAG &DAG) const; + virtual SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const; + virtual bool + CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const; + virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -209,6 +371,8 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; + MachineBasicBlock *EmitBPOSGE32(MachineInstr *MI, + MachineBasicBlock *BB) const; MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand = false) const; MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI, diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td index 3e78c45..33ee020 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td +++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td @@ -90,20 +90,20 @@ def fpimm0neg : PatLeaf<(fpimm), [{ let DecoderMethod = "DecodeFMem" in { class FPLoad<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>: FMem<op, (outs RC:$ft), (ins MemOpnd:$addr), - !strconcat(opstr, "\t$ft, $addr"), [(set RC:$ft, (load_a addr:$addr))], + !strconcat(opstr, "\t$ft, $addr"), [(set RC:$ft, (load addr:$addr))], IILoad>; // FP store. class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>: FMem<op, (outs), (ins RC:$ft, MemOpnd:$addr), - !strconcat(opstr, "\t$ft, $addr"), [(store_a RC:$ft, addr:$addr)], + !strconcat(opstr, "\t$ft, $addr"), [(store RC:$ft, addr:$addr)], IIStore>; } // FP indexed load. class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC, RegisterClass PRC, SDPatternOperator FOp = null_frag>: FFMemIdx<funct, (outs DRC:$fd), (ins PRC:$base, PRC:$index), - !strconcat(opstr, "\t$fd, $index($base)"), + !strconcat(opstr, "\t$fd, ${index}(${base})"), [(set DRC:$fd, (FOp (add PRC:$base, PRC:$index)))]> { let fs = 0; } @@ -112,7 +112,7 @@ class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC, class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC, RegisterClass PRC, SDPatternOperator FOp= null_frag>: FFMemIdx<funct, (outs), (ins DRC:$fs, PRC:$base, PRC:$index), - !strconcat(opstr, "\t$fs, $index($base)"), + !strconcat(opstr, "\t$fs, ${index}(${base})"), [(FOp DRC:$fs, (add PRC:$base, PRC:$index))]> { let fd = 0; } @@ -182,20 +182,21 @@ defm CEIL_W : FFR1_W_M<0xe, "ceil">; defm CEIL_L : FFR1_L_M<0xa, "ceil">; defm FLOOR_W : FFR1_W_M<0xf, "floor">; defm FLOOR_L : FFR1_L_M<0xb, "floor">; -defm CVT_W : FFR1_W_M<0x24, "cvt">; +defm CVT_W : FFR1_W_M<0x24, "cvt">, NeverHasSideEffects; //defm CVT_L : FFR1_L_M<0x25, "cvt">; -def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>; -def CVT_L_S : FFR1<0x25, 16, "cvt", "l.s", FGR64, FGR32>; -def CVT_L_D64: FFR1<0x25, 17, "cvt", "l.d", FGR64, FGR64>; +def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>, NeverHasSideEffects; +def CVT_L_S : FFR1<0x25, 16, "cvt", "l.s", FGR64, FGR32>, NeverHasSideEffects; +def CVT_L_D64: FFR1<0x25, 17, "cvt", "l.d", FGR64, FGR64>, NeverHasSideEffects; -let Predicates = [NotFP64bit, HasStandardEncoding] in { +let Predicates = [NotFP64bit, HasStandardEncoding], neverHasSideEffects = 1 in { def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>; def CVT_D32_W : FFR1<0x21, 20, "cvt", "d.w", AFGR64, FGR32>; def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>; } -let Predicates = [IsFP64bit, HasStandardEncoding], DecoderNamespace = "Mips64" in { +let Predicates = [IsFP64bit, HasStandardEncoding], DecoderNamespace = "Mips64", + neverHasSideEffects = 1 in { def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>; def CVT_S_L : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>; def CVT_D64_W : FFR1<0x21, 20, "cvt", "d.w", FGR64, FGR32>; @@ -282,26 +283,26 @@ let Predicates = [NotN64, NotMips64, HasStandardEncoding] in { // Indexed loads and stores. let Predicates = [HasMips32r2Or64, HasStandardEncoding] in { - def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load_a>; - def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store_a>; + def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load>; + def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store>; } let Predicates = [HasMips32r2, NotMips64, HasStandardEncoding] in { - def LDXC1 : FPIdxLoad<0x1, "ldxc1", AFGR64, CPURegs, load_a>; - def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store_a>; + def LDXC1 : FPIdxLoad<0x1, "ldxc1", AFGR64, CPURegs, load>; + def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store>; } let Predicates = [HasMips64, NotN64, HasStandardEncoding], DecoderNamespace="Mips64" in { - def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load_a>; - def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store_a>; + def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load>; + def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store>; } // n64 let Predicates = [IsN64, HasStandardEncoding], isCodeGenOnly=1 in { - def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load_a>; - def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load_a>; - def SWXC1_P8 : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store_a>; - def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store_a>; + def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load>; + def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load>; + def SWXC1_P8 : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store>; + def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store>; } // Load/store doubleword indexed unaligned. diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td index 8feb853..1ecbdc2 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td +++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td @@ -92,6 +92,14 @@ class PseudoSE<dag outs, dag ins, string asmstr, list<dag> pattern>: let Predicates = [HasStandardEncoding]; } +// Pseudo-instructions for alternate assembly syntax (never used by codegen). +// These are aliases that require C++ handling to convert to the target +// instruction, while InstAliases can be handled directly by tblgen. +class MipsAsmPseudoInst<dag outs, dag ins, string asmstr>: + MipsInst<outs, ins, asmstr, [], IIPseudo, Pseudo> { + let isPseudo = 1; + let Pattern = []; +} //===----------------------------------------------------------------------===// // Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|> //===----------------------------------------------------------------------===// @@ -163,6 +171,27 @@ class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, let Inst{25-0} = addr; } + //===----------------------------------------------------------------------===// +// MFC instruction class in Mips : <|op|mf|rt|rd|0000000|sel|> +//===----------------------------------------------------------------------===// +class MFC3OP<bits<6> op, bits<5> _mfmt, dag outs, dag ins, string asmstr>: + InstSE<outs, ins, asmstr, [], NoItinerary, FrmFR> +{ + bits<5> mfmt; + bits<5> rt; + bits<5> rd; + bits<3> sel; + + let Opcode = op; + let mfmt = _mfmt; + + let Inst{25-21} = mfmt; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-3} = 0; + let Inst{2-0} = sel; +} + //===----------------------------------------------------------------------===// // // FLOATING POINT INSTRUCTION FORMATS diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp index 50e3eb5..ca80d43 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp @@ -95,6 +95,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { + MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); // Skip all the debug instructions. @@ -177,9 +178,14 @@ void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, const MCInstrDesc &MCID = get(Opc); MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID); - for (unsigned i = 1; i < Cond.size(); ++i) - MIB.addReg(Cond[i].getReg()); - + for (unsigned i = 1; i < Cond.size(); ++i) { + if (Cond[i].isReg()) + MIB.addReg(Cond[i].getReg()); + else if (Cond[i].isImm()) + MIB.addImm(Cond[i].getImm()); + else + assert(true && "Cannot copy operand"); + } MIB.addMBB(TBB); } @@ -262,46 +268,3 @@ unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { } } } - -unsigned -llvm::Mips::loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII, - MachineBasicBlock& MBB, - MachineBasicBlock::iterator II, DebugLoc DL, - bool LastInstrIsADDiu, - MipsAnalyzeImmediate::Inst *LastInst) { - MipsAnalyzeImmediate AnalyzeImm; - unsigned Size = IsN64 ? 64 : 32; - unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi; - unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO; - unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT; - - const MipsAnalyzeImmediate::InstSeq &Seq = - AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu); - MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - - if (LastInst && (Seq.size() == 1)) { - *LastInst = *Inst; - return 0; - } - - // The first instruction can be a LUi, which is different from other - // instructions (ADDiu, ORI and SLL) in that it does not have a register - // operand. - if (Inst->Opc == LUi) - BuildMI(MBB, II, DL, TII.get(LUi), ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - else - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - // Build the remaining instructions in Seq. Skip the last instruction if - // LastInst is not 0. - for (++Inst; Inst != Seq.end() - !!LastInst; ++Inst) - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - if (LastInst) - *LastInst = *Inst; - - return Seq.size() - !!LastInst; -} diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h index 7d56259..aca2bc7 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h +++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h @@ -88,18 +88,6 @@ private: const SmallVectorImpl<MachineOperand>& Cond) const; }; -namespace Mips { - /// Emit a series of instructions to load an immediate. All instructions - /// except for the last one are emitted. The function returns the number of - /// MachineInstrs generated. The opcode-immediate pair of the last - /// instruction is returned in LastInst, if it is not 0. - unsigned - loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII, - MachineBasicBlock& MBB, MachineBasicBlock::iterator II, - DebugLoc DL, bool LastInstrIsADDiu, - MipsAnalyzeImmediate::Inst *LastInst); -} - /// Create MipsInstrInfo objects. const MipsInstrInfo *createMips16InstrInfo(MipsTargetMachine &TM); const MipsInstrInfo *createMipsSEInstrInfo(MipsTargetMachine &TM); diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td index da15d4d..f16b5f9 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -52,6 +52,10 @@ def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; +// Tail call +def MipsTailCall : SDNode<"MipsISD::TailCall", SDT_MipsJmpLink, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + // Hi and Lo nodes are used to handle global addresses. Used on // MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol // static model. (nothing to do with Mips Registers Hi and Lo) @@ -74,9 +78,10 @@ def MipsRet : SDNode<"MipsISD::Ret", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPOptInGlue, SDNPOutGlue]>; // MAdd*/MSub* nodes def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub, @@ -110,7 +115,7 @@ def MipsWrapper : SDNode<"MipsISD::Wrapper", SDTIntBinOp>; def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc, [SDNPHasChain, SDNPInGlue]>; -def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain]>; +def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain,SDNPSideEffect]>; def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>; def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>; @@ -174,6 +179,35 @@ class MipsPat<dag pattern, dag result> : Pat<pattern, result> { let Predicates = [HasStandardEncoding]; } +class IsBranch { + bit isBranch = 1; +} + +class IsReturn { + bit isReturn = 1; +} + +class IsCall { + bit isCall = 1; +} + +class IsTailCall { + bit isCall = 1; + bit isTerminator = 1; + bit isReturn = 1; + bit isBarrier = 1; + bit hasExtraSrcRegAllocReq = 1; + bit isCodeGenOnly = 1; +} + +class IsAsCheapAsAMove { + bit isAsCheapAsAMove = 1; +} + +class NeverHasSideEffects { + bit neverHasSideEffects = 1; +} + //===----------------------------------------------------------------------===// // Instruction format superclass //===----------------------------------------------------------------------===// @@ -208,17 +242,24 @@ def uimm16 : Operand<i32> { let PrintMethod = "printUnsignedImm"; } +def MipsMemAsmOperand : AsmOperandClass { + let Name = "Mem"; + let ParserMethod = "parseMemOperand"; +} + // Address operand def mem : Operand<i32> { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops CPURegs, simm16); let EncoderMethod = "getMemEncoding"; + let ParserMatchClass = MipsMemAsmOperand; } def mem64 : Operand<i64> { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops CPU64Regs, simm16_64); let EncoderMethod = "getMemEncoding"; + let ParserMatchClass = MipsMemAsmOperand; } def mem_ea : Operand<i32> { @@ -285,57 +326,25 @@ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], [SDNPWantParent]>; //===----------------------------------------------------------------------===// -// Pattern fragment for load/store +// Instructions specific format //===----------------------------------------------------------------------===// -class UnalignedLoad<PatFrag Node> : - PatFrag<(ops node:$ptr), (Node node:$ptr), [{ - LoadSDNode *LD = cast<LoadSDNode>(N); - return LD->getMemoryVT().getSizeInBits()/8 > LD->getAlignment(); -}]>; -class AlignedLoad<PatFrag Node> : - PatFrag<(ops node:$ptr), (Node node:$ptr), [{ - LoadSDNode *LD = cast<LoadSDNode>(N); - return LD->getMemoryVT().getSizeInBits()/8 <= LD->getAlignment(); -}]>; - -class UnalignedStore<PatFrag Node> : - PatFrag<(ops node:$val, node:$ptr), (Node node:$val, node:$ptr), [{ - StoreSDNode *SD = cast<StoreSDNode>(N); - return SD->getMemoryVT().getSizeInBits()/8 > SD->getAlignment(); -}]>; +/// Move Control Registers From/To CPU Registers +def MFC0_3OP : MFC3OP<0x10, 0, (outs CPURegs:$rt), + (ins CPURegs:$rd, uimm16:$sel),"mfc0\t$rt, $rd, $sel">; +def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegs:$rt, CPURegs:$rd, 0)>; -class AlignedStore<PatFrag Node> : - PatFrag<(ops node:$val, node:$ptr), (Node node:$val, node:$ptr), [{ - StoreSDNode *SD = cast<StoreSDNode>(N); - return SD->getMemoryVT().getSizeInBits()/8 <= SD->getAlignment(); -}]>; +def MTC0_3OP : MFC3OP<0x10, 4, (outs CPURegs:$rd, uimm16:$sel), + (ins CPURegs:$rt),"mtc0\t$rt, $rd, $sel">; +def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP CPURegs:$rd, 0, CPURegs:$rt)>; -// Load/Store PatFrags. -def sextloadi16_a : AlignedLoad<sextloadi16>; -def zextloadi16_a : AlignedLoad<zextloadi16>; -def extloadi16_a : AlignedLoad<extloadi16>; -def load_a : AlignedLoad<load>; -def sextloadi32_a : AlignedLoad<sextloadi32>; -def zextloadi32_a : AlignedLoad<zextloadi32>; -def extloadi32_a : AlignedLoad<extloadi32>; -def truncstorei16_a : AlignedStore<truncstorei16>; -def store_a : AlignedStore<store>; -def truncstorei32_a : AlignedStore<truncstorei32>; -def sextloadi16_u : UnalignedLoad<sextloadi16>; -def zextloadi16_u : UnalignedLoad<zextloadi16>; -def extloadi16_u : UnalignedLoad<extloadi16>; -def load_u : UnalignedLoad<load>; -def sextloadi32_u : UnalignedLoad<sextloadi32>; -def zextloadi32_u : UnalignedLoad<zextloadi32>; -def extloadi32_u : UnalignedLoad<extloadi32>; -def truncstorei16_u : UnalignedStore<truncstorei16>; -def store_u : UnalignedStore<store>; -def truncstorei32_u : UnalignedStore<truncstorei32>; +def MFC2_3OP : MFC3OP<0x12, 0, (outs CPURegs:$rt), + (ins CPURegs:$rd, uimm16:$sel),"mfc2\t$rt, $rd, $sel">; +def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegs:$rt, CPURegs:$rd, 0)>; -//===----------------------------------------------------------------------===// -// Instructions specific format -//===----------------------------------------------------------------------===// +def MTC2_3OP : MFC3OP<0x12, 4, (outs CPURegs:$rd, uimm16:$sel), + (ins CPURegs:$rt),"mtc2\t$rt, $rd, $sel">; +def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegs:$rd, 0, CPURegs:$rt)>; // Arithmetic and logical instructions with 3 register operands. class ArithLogicR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode, @@ -416,7 +425,7 @@ class shift_rotate_reg<bits<6> func, bits<5> isRotate, string instr_asm, // Load Upper Imediate class LoadUpper<bits<6> op, string instr_asm, RegisterClass RC, Operand Imm>: FI<op, (outs RC:$rt), (ins Imm:$imm16), - !strconcat(instr_asm, "\t$rt, $imm16"), [], IIAlu> { + !strconcat(instr_asm, "\t$rt, $imm16"), [], IIAlu>, IsAsCheapAsAMove { let rs = 0; let neverHasSideEffects = 1; let isReMaterializable = 1; @@ -597,14 +606,13 @@ class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od, IIAlu>; // Jump -class JumpFJ<bits<6> op, string instr_asm>: - FJ<op, (outs), (ins jmptarget:$target), - !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch> { - let isBranch=1; +class JumpFJ<bits<6> op, DAGOperand opnd, string instr_asm, + SDPatternOperator operator, SDPatternOperator targetoperator>: + FJ<op, (outs), (ins opnd:$target), !strconcat(instr_asm, "\t$target"), + [(operator targetoperator:$target)], IIBranch> { let isTerminator=1; let isBarrier=1; let hasDelaySlot = 1; - let Predicates = [RelocStatic, HasStandardEncoding]; let DecoderMethod = "DecodeJumpTarget"; let Defs = [AT]; } @@ -625,21 +633,21 @@ class UncondBranch<bits<6> op, string instr_asm>: // Base class for indirect branch and return instruction classes. let isTerminator=1, isBarrier=1, hasDelaySlot = 1 in -class JumpFR<RegisterClass RC, list<dag> pattern>: - FR<0, 0x8, (outs), (ins RC:$rs), "jr\t$rs", pattern, IIBranch> { +class JumpFR<RegisterClass RC, SDPatternOperator operator = null_frag>: + FR<0, 0x8, (outs), (ins RC:$rs), "jr\t$rs", [(operator RC:$rs)], IIBranch> { let rt = 0; let rd = 0; let shamt = 0; } // Indirect branch -class IndirectBranch<RegisterClass RC>: JumpFR<RC, [(brind RC:$rs)]> { +class IndirectBranch<RegisterClass RC>: JumpFR<RC, brind> { let isBranch = 1; let isIndirectBranch = 1; } // Return instruction -class RetBase<RegisterClass RC>: JumpFR<RC, []> { +class RetBase<RegisterClass RC>: JumpFR<RC> { let isReturn = 1; let isCodeGenOnly = 1; let hasCtrlDep = 1; @@ -905,12 +913,28 @@ let usesCustomInserter = 1 in { // Instruction definition //===----------------------------------------------------------------------===// +class LoadImm32< string instr_asm, Operand Od, RegisterClass RC> : + MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32), + !strconcat(instr_asm, "\t$rt, $imm32")> ; +def LoadImm32Reg : LoadImm32<"li", shamt,CPURegs>; + +class LoadAddress<string instr_asm, Operand MemOpnd, RegisterClass RC> : + MipsAsmPseudoInst<(outs RC:$rt), (ins MemOpnd:$addr), + !strconcat(instr_asm, "\t$rt, $addr")> ; +def LoadAddr32Reg : LoadAddress<"la", mem, CPURegs>; + +class LoadAddressImm<string instr_asm, Operand Od, RegisterClass RC> : + MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32), + !strconcat(instr_asm, "\t$rt, $imm32")> ; +def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegs>; + //===----------------------------------------------------------------------===// // MipsI Instructions //===----------------------------------------------------------------------===// /// Arithmetic Instructions (ALU Immediate) -def ADDiu : ArithLogicI<0x09, "addiu", add, simm16, immSExt16, CPURegs>; +def ADDiu : ArithLogicI<0x09, "addiu", add, simm16, immSExt16, CPURegs>, + IsAsCheapAsAMove; def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16, CPURegs>; def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16, CPURegs>; def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16, CPURegs>; @@ -949,19 +973,12 @@ let Predicates = [HasMips32r2, HasStandardEncoding] in { /// aligned defm LB : LoadM32<0x20, "lb", sextloadi8>; defm LBu : LoadM32<0x24, "lbu", zextloadi8>; -defm LH : LoadM32<0x21, "lh", sextloadi16_a>; -defm LHu : LoadM32<0x25, "lhu", zextloadi16_a>; -defm LW : LoadM32<0x23, "lw", load_a>; +defm LH : LoadM32<0x21, "lh", sextloadi16>; +defm LHu : LoadM32<0x25, "lhu", zextloadi16>; +defm LW : LoadM32<0x23, "lw", load>; defm SB : StoreM32<0x28, "sb", truncstorei8>; -defm SH : StoreM32<0x29, "sh", truncstorei16_a>; -defm SW : StoreM32<0x2b, "sw", store_a>; - -/// unaligned -defm ULH : LoadM32<0x21, "ulh", sextloadi16_u, 1>; -defm ULHu : LoadM32<0x25, "ulhu", zextloadi16_u, 1>; -defm ULW : LoadM32<0x23, "ulw", load_u, 1>; -defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>; -defm USW : StoreM32<0x2b, "usw", store_u, 1>; +defm SH : StoreM32<0x29, "sh", truncstorei16>; +defm SW : StoreM32<0x2b, "sw", store>; /// load/store left/right defm LWL : LoadLeftRightM32<0x22, "lwl", MipsLWL>; @@ -996,7 +1013,8 @@ def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, } /// Jump and Branch Instructions -def J : JumpFJ<0x02, "j">; +def J : JumpFJ<0x02, jmptarget, "j", br, bb>, + Requires<[RelocStatic, HasStandardEncoding]>, IsBranch; def JR : IndirectBranch<CPURegs>; def B : UncondBranch<0x04, "b">; def BEQ : CBranch<0x04, "beq", seteq, CPURegs>; @@ -1014,6 +1032,8 @@ def JAL : JumpLink<0x03, "jal">; def JALR : JumpLinkReg<0x00, 0x09, "jalr", CPURegs>; def BGEZAL : BranchLink<"bgezal", 0x11, CPURegs>; def BLTZAL : BranchLink<"bltzal", 0x10, CPURegs>; +def TAILCALL : JumpFJ<0x02, calltarget, "j", MipsTailCall, imm>, IsTailCall; +def TAILCALL_R : JumpFR<CPURegs, MipsTailCall>, IsTailCall; def RET : RetBase<CPURegs>; @@ -1072,6 +1092,26 @@ def EXT : ExtBase<0, "ext", CPURegs>; def INS : InsBase<4, "ins", CPURegs>; //===----------------------------------------------------------------------===// +// Instruction aliases +//===----------------------------------------------------------------------===// +def : InstAlias<"move $dst,$src", (ADD CPURegs:$dst,CPURegs:$src,ZERO)>; +def : InstAlias<"bal $offset", (BGEZAL RA,brtarget:$offset)>; +def : InstAlias<"addu $rs,$rt,$imm", + (ADDiu CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; +def : InstAlias<"add $rs,$rt,$imm", + (ADDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; +def : InstAlias<"and $rs,$rt,$imm", + (ANDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; +def : InstAlias<"j $rs", (JR CPURegs:$rs)>; +def : InstAlias<"not $rt,$rs", (NOR CPURegs:$rt,CPURegs:$rs,ZERO)>; +def : InstAlias<"neg $rt,$rs", (SUB CPURegs:$rt,ZERO,CPURegs:$rs)>; +def : InstAlias<"negu $rt,$rs", (SUBu CPURegs:$rt,ZERO,CPURegs:$rs)>; +def : InstAlias<"slt $rs,$rt,$imm", + (SLTi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; +def : InstAlias<"xor $rs,$rt,$imm", + (XORi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; + +//===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// @@ -1103,6 +1143,11 @@ def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)), //def : MipsPat<(MipsJmpLink CPURegs:$dst), // (JALR CPURegs:$dst)>; +// Tail call +def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)), + (TAILCALL tglobaladdr:$dst)>; +def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)), + (TAILCALL texternalsym:$dst)>; // hi/lo relocs def : MipsPat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; def : MipsPat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>; @@ -1153,24 +1198,20 @@ def : MipsPat<(not CPURegs:$in), let Predicates = [NotN64, HasStandardEncoding] in { def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>; def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>; - def : MipsPat<(i32 (extloadi16_a addr:$src)), (LHu addr:$src)>; - def : MipsPat<(i32 (extloadi16_u addr:$src)), (ULHu addr:$src)>; + def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu addr:$src)>; } let Predicates = [IsN64, HasStandardEncoding] in { def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>; def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>; - def : MipsPat<(i32 (extloadi16_a addr:$src)), (LHu_P8 addr:$src)>; - def : MipsPat<(i32 (extloadi16_u addr:$src)), (ULHu_P8 addr:$src)>; + def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu_P8 addr:$src)>; } // peepholes let Predicates = [NotN64, HasStandardEncoding] in { - def : MipsPat<(store_a (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; - def : MipsPat<(store_u (i32 0), addr:$dst), (USW ZERO, addr:$dst)>; + def : MipsPat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; } let Predicates = [IsN64, HasStandardEncoding] in { - def : MipsPat<(store_a (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>; - def : MipsPat<(store_u (i32 0), addr:$dst), (USW_P8 ZERO, addr:$dst)>; + def : MipsPat<(store (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>; } // brcond patterns @@ -1265,3 +1306,8 @@ include "MipsCondMov.td" include "Mips16InstrFormats.td" include "Mips16InstrInfo.td" + +// DSP +include "MipsDSPInstrFormats.td" +include "MipsDSPInstrInfo.td" + diff --git a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp index f78203f..5d9f0cf 100644 --- a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp @@ -10,6 +10,10 @@ // This pass expands a branch or jump instruction into a long branch if its // offset is too large to fit into its immediate field. // +// FIXME: +// 1. Fix pc-region jump instructions which cross 256MB segment boundaries. +// 2. If program has inline assembly statements whose size cannot be +// determined accurately, load branch target addresses from the GOT. //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-long-branch" @@ -48,7 +52,7 @@ namespace { typedef MachineBasicBlock::reverse_iterator ReverseIter; struct MBBInfo { - uint64_t Size; + uint64_t Size, Address; bool HasLongBranch; MachineInstr *Br; @@ -61,7 +65,10 @@ namespace { static char ID; MipsLongBranch(TargetMachine &tm) : MachineFunctionPass(ID), TM(tm), - TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())) {} + TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())), + IsPIC(TM.getRelocationModel() == Reloc::PIC_), + ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()), + LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 13 : 9)) {} virtual const char *getPassName() const { return "Mips Long Branch"; @@ -81,6 +88,9 @@ namespace { const MipsInstrInfo *TII; MachineFunction *MF; SmallVector<MBBInfo, 16> MBBInfos; + bool IsPIC; + unsigned ABI; + unsigned LongBranchSeqSize; }; char MipsLongBranch::ID = 0; @@ -230,12 +240,6 @@ void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br, // Expand branch instructions to long branches. void MipsLongBranch::expandToLongBranch(MBBInfo &I) { - I.HasLongBranch = true; - - bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; - unsigned ABI = TM.getSubtarget<MipsSubtarget>().getTargetABI(); - bool N64 = ABI == MipsSubtarget::N64; - MachineBasicBlock::iterator Pos; MachineBasicBlock *MBB = I.Br->getParent(), *TgtMBB = getTargetMBB(*I.Br); DebugLoc DL = I.Br->getDebugLoc(); @@ -248,101 +252,105 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { MBB->addSuccessor(LongBrMBB); if (IsPIC) { - // $longbr: - // addiu $sp, $sp, -regsize * 2 - // sw $ra, 0($sp) - // bal $baltgt - // sw $a3, regsize($sp) - // $baltgt: - // lui $a3, %hi($baltgt) - // lui $at, %hi($tgt) - // addiu $a3, $a3, %lo($baltgt) - // addiu $at, $at, %lo($tgt) - // subu $at, $at, $a3 - // addu $at, $ra, $at - // - // if n64: - // lui $a3, %highest($baltgt) - // lui $ra, %highest($tgt) - // addiu $a3, $a3, %higher($baltgt) - // addiu $ra, $ra, %higher($tgt) - // dsll $a3, $a3, 32 - // dsll $ra, $ra, 32 - // subu $at, $at, $a3 - // addu $at, $at, $ra - // - // lw $ra, 0($sp) - // lw $a3, regsize($sp) - // jr $at - // addiu $sp, $sp, regsize * 2 - // $fallthrough: - // - MF->getInfo<MipsFunctionInfo>()->setEmitNOAT(); MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB); MF->insert(FallThroughMBB, BalTgtMBB); LongBrMBB->addSuccessor(BalTgtMBB); BalTgtMBB->addSuccessor(TgtMBB); - int RegSize = N64 ? 8 : 4; - unsigned AT = N64 ? Mips::AT_64 : Mips::AT; - unsigned A3 = N64 ? Mips::A3_64 : Mips::A3; - unsigned SP = N64 ? Mips::SP_64 : Mips::SP; - unsigned RA = N64 ? Mips::RA_64 : Mips::RA; - unsigned Load = N64 ? Mips::LD_P8 : Mips::LW; - unsigned Store = N64 ? Mips::SD_P8 : Mips::SW; - unsigned LUi = N64 ? Mips::LUi64 : Mips::LUi; - unsigned ADDiu = N64 ? Mips::DADDiu : Mips::ADDiu; - unsigned ADDu = N64 ? Mips::DADDu : Mips::ADDu; - unsigned SUBu = N64 ? Mips::SUBu : Mips::SUBu; - unsigned JR = N64 ? Mips::JR64 : Mips::JR; - - Pos = LongBrMBB->begin(); - - BuildMI(*LongBrMBB, Pos, DL, TII->get(ADDiu), SP).addReg(SP) - .addImm(-RegSize * 2); - BuildMI(*LongBrMBB, Pos, DL, TII->get(Store)).addReg(RA).addReg(SP) - .addImm(0); - BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB); - BuildMI(*LongBrMBB, Pos, DL, TII->get(Store)).addReg(A3).addReg(SP) - .addImm(RegSize)->setIsInsideBundle(); - - Pos = BalTgtMBB->begin(); - - BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), A3) - .addMBB(BalTgtMBB, MipsII::MO_ABS_HI); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), AT) - .addMBB(TgtMBB, MipsII::MO_ABS_HI); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), A3).addReg(A3) - .addMBB(BalTgtMBB, MipsII::MO_ABS_LO); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), AT).addReg(AT) - .addMBB(TgtMBB, MipsII::MO_ABS_LO); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(SUBu), AT).addReg(AT).addReg(A3); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDu), AT).addReg(RA).addReg(AT); - - if (N64) { - BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), A3) - .addMBB(BalTgtMBB, MipsII::MO_HIGHEST); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), RA) - .addMBB(TgtMBB, MipsII::MO_HIGHEST); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), A3).addReg(A3) - .addMBB(BalTgtMBB, MipsII::MO_HIGHER); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), RA).addReg(RA) - .addMBB(TgtMBB, MipsII::MO_HIGHER); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DSLL), A3).addReg(A3) - .addImm(32); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DSLL), RA).addReg(RA) - .addImm(32); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(SUBu), AT).addReg(AT).addReg(A3); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDu), AT).addReg(AT).addReg(RA); - I.Size += 4 * 8; + int64_t TgtAddress = MBBInfos[TgtMBB->getNumber()].Address; + int64_t Offset = TgtAddress - (I.Address + I.Size - 20); + int64_t Lo = SignExtend64<16>(Offset & 0xffff); + int64_t Hi = SignExtend64<16>(((Offset + 0x8000) >> 16) & 0xffff); + + if (ABI != MipsSubtarget::N64) { + // $longbr: + // addiu $sp, $sp, -8 + // sw $ra, 0($sp) + // bal $baltgt + // lui $at, %hi($tgt - $baltgt) + // $baltgt: + // addiu $at, $at, %lo($tgt - $baltgt) + // addu $at, $ra, $at + // lw $ra, 0($sp) + // jr $at + // addiu $sp, $sp, 8 + // $fallthrough: + // + + Pos = LongBrMBB->begin(); + + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP).addImm(-8); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SW)).addReg(Mips::RA) + .addReg(Mips::SP).addImm(0); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LUi), Mips::AT).addImm(Hi) + ->setIsInsideBundle(); + + Pos = BalTgtMBB->begin(); + + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::AT) + .addReg(Mips::AT).addImm(Lo); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDu), Mips::AT) + .addReg(Mips::RA).addReg(Mips::AT); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA) + .addReg(Mips::SP).addImm(0); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP).addImm(8)->setIsInsideBundle(); + } else { + // $longbr: + // daddiu $sp, $sp, -16 + // sd $ra, 0($sp) + // lui64 $at, %highest($tgt - $baltgt) + // daddiu $at, $at, %higher($tgt - $baltgt) + // dsll $at, $at, 16 + // daddiu $at, $at, %hi($tgt - $baltgt) + // bal $baltgt + // dsll $at, $at, 16 + // $baltgt: + // daddiu $at, $at, %lo($tgt - $baltgt) + // daddu $at, $ra, $at + // ld $ra, 0($sp) + // jr64 $at + // daddiu $sp, $sp, 16 + // $fallthrough: + // + + int64_t Higher = SignExtend64<16>(((Offset + 0x80008000) >> 32) & 0xffff); + int64_t Highest = + SignExtend64<16>(((Offset + 0x800080008000LL) >> 48) & 0xffff); + + Pos = LongBrMBB->begin(); + + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) + .addReg(Mips::SP_64).addImm(-16); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SD)).addReg(Mips::RA_64) + .addReg(Mips::SP_64).addImm(0); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LUi64), Mips::AT_64) + .addImm(Highest); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64) + .addReg(Mips::AT_64).addImm(Higher); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64) + .addReg(Mips::AT_64).addImm(16); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64) + .addReg(Mips::AT_64).addImm(Hi); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64) + .addReg(Mips::AT_64).addImm(16)->setIsInsideBundle(); + + Pos = BalTgtMBB->begin(); + + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64) + .addReg(Mips::AT_64).addImm(Lo); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDu), Mips::AT_64) + .addReg(Mips::RA_64).addReg(Mips::AT_64); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64) + .addReg(Mips::SP_64).addImm(0); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) + .addReg(Mips::SP_64).addImm(16)->setIsInsideBundle(); } - - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Load), RA).addReg(SP).addImm(0); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Load), A3).addReg(SP).addImm(RegSize); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(JR)).addReg(AT); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), SP).addReg(SP) - .addImm(RegSize * 2)->setIsInsideBundle(); - I.Size += 4 * 14; } else { // $longbr: // j $tgt @@ -353,7 +361,6 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { LongBrMBB->addSuccessor(TgtMBB); BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::J)).addMBB(TgtMBB); BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::NOP))->setIsInsideBundle(); - I.Size += 4 * 2; } if (I.Br->isUnconditionalBranch()) { @@ -401,19 +408,34 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) { if (!I->Br || I->HasLongBranch) continue; - if (!ForceLongBranch) - // Check if offset fits into 16-bit immediate field of branches. - if (isInt<16>(computeOffset(I->Br) / 4)) - continue; + // Check if offset fits into 16-bit immediate field of branches. + if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / 4)) + continue; - expandToLongBranch(*I); + I->HasLongBranch = true; + I->Size += LongBranchSeqSize * 4; ++LongBranches; EverMadeChange = MadeChange = true; } } - if (EverMadeChange) - MF->RenumberBlocks(); + if (!EverMadeChange) + return true; + + // Compute basic block addresses. + if (TM.getRelocationModel() == Reloc::PIC_) { + uint64_t Address = 0; + + for (I = MBBInfos.begin(); I != E; Address += I->Size, ++I) + I->Address = Address; + } + + // Do the expansion. + for (I = MBBInfos.begin(); I != E; ++I) + if (I->HasLongBranch) + expandToLongBranch(*I); + + MF->RenumberBlocks(); return true; } diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp index d4c5e6d..5fa6339 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp @@ -11,7 +11,6 @@ // MCInst records. // //===----------------------------------------------------------------------===// - #include "MipsMCInstLower.h" #include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" @@ -161,31 +160,3 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } } -// If the D<shift> instruction has a shift amount that is greater -// than 31 (checked in calling routine), lower it to a D<shift>32 instruction -void MipsMCInstLower::LowerLargeShift(const MachineInstr *MI, - MCInst& Inst, - int64_t Shift) { - // rt - Inst.addOperand(LowerOperand(MI->getOperand(0))); - // rd - Inst.addOperand(LowerOperand(MI->getOperand(1))); - // saminus32 - Inst.addOperand(MCOperand::CreateImm(Shift)); - - switch (MI->getOpcode()) { - default: - // Calling function is not synchronized - llvm_unreachable("Unexpected shift instruction"); - break; - case Mips::DSLL: - Inst.setOpcode(Mips::DSLL32); - break; - case Mips::DSRL: - Inst.setOpcode(Mips::DSRL32); - break; - case Mips::DSRA: - Inst.setOpcode(Mips::DSRA32); - break; - } -} diff --git a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h index 0abb996..c4a6016 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h +++ b/contrib/llvm/lib/Target/Mips/MipsMCInstLower.h @@ -33,12 +33,11 @@ public: MipsMCInstLower(MipsAsmPrinter &asmprinter); void Initialize(Mangler *mang, MCContext *C); void Lower(const MachineInstr *MI, MCInst &OutMI) const; - void LowerLargeShift(const MachineInstr *MI, MCInst &Inst, int64_t Shift); + MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const; private: MCOperand LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, unsigned Offset) const; - MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const; }; } diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp index 362173e..5ff19ab 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp @@ -43,4 +43,17 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() { return GlobalBaseReg = MF.getRegInfo().createVirtualRegister(RC); } +bool MipsFunctionInfo::mips16SPAliasRegSet() const { + return Mips16SPAliasReg; +} +unsigned MipsFunctionInfo::getMips16SPAliasReg() { + // Return if it has already been initialized. + if (Mips16SPAliasReg) + return Mips16SPAliasReg; + + const TargetRegisterClass *RC; + RC=(const TargetRegisterClass*)&Mips::CPU16RegsRegClass; + return Mips16SPAliasReg = MF.getRegInfo().createVirtualRegister(RC); +} + void MipsFunctionInfo::anchor() { } diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h index df3c4c0..bb45f92 100644 --- a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h +++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h @@ -39,55 +39,45 @@ class MipsFunctionInfo : public MachineFunctionInfo { /// relocation models. unsigned GlobalBaseReg; + /// Mips16SPAliasReg - keeps track of the virtual register initialized for + /// use as an alias for SP for use in load/store of halfword/byte from/to + /// the stack + unsigned Mips16SPAliasReg; + /// VarArgsFrameIndex - FrameIndex for start of varargs area. int VarArgsFrameIndex; - // Range of frame object indices. - // InArgFIRange: Range of indices of all frame objects created during call to - // LowerFormalArguments. - // OutArgFIRange: Range of indices of all frame objects created during call to - // LowerCall except for the frame object for restoring $gp. - std::pair<int, int> InArgFIRange, OutArgFIRange; - unsigned MaxCallFrameSize; + /// True if function has a byval argument. + bool HasByvalArg; - bool EmitNOAT; + /// Size of incoming argument area. + unsigned IncomingArgSize; public: MipsFunctionInfo(MachineFunction& MF) - : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), - VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), - OutArgFIRange(std::make_pair(-1, 0)), MaxCallFrameSize(0), EmitNOAT(false) + : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0), + VarArgsFrameIndex(0) {} - bool isInArgFI(int FI) const { - return FI <= InArgFIRange.first && FI >= InArgFIRange.second; - } - void setLastInArgFI(int FI) { InArgFIRange.second = FI; } - - bool isOutArgFI(int FI) const { - return FI <= OutArgFIRange.first && FI >= OutArgFIRange.second; - } - void extendOutArgFIRange(int FirstFI, int LastFI) { - if (!OutArgFIRange.second) - // this must be the first time this function was called. - OutArgFIRange.first = FirstFI; - OutArgFIRange.second = LastFI; - } - unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } bool globalBaseRegSet() const; unsigned getGlobalBaseReg(); + bool mips16SPAliasRegSet() const; + unsigned getMips16SPAliasReg(); + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } - unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; } - void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; } + bool hasByvalArg() const { return HasByvalArg; } + void setFormalArgInfo(unsigned Size, bool HasByval) { + IncomingArgSize = Size; + HasByvalArg = HasByval; + } - bool getEmitNOAT() const { return EmitNOAT; } - void setEmitNOAT() { EmitNOAT = true; } + unsigned getIncomingArgSize() const { return IncomingArgSize; } }; } // end of namespace llvm diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp index ae6ae3a..d8e0dd4 100644 --- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp @@ -22,7 +22,6 @@ #include "llvm/Constants.h" #include "llvm/DebugInfo.h" #include "llvm/Type.h" -#include "llvm/Function.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" @@ -43,9 +42,8 @@ using namespace llvm; -MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, - const TargetInstrInfo &tii) - : MipsGenRegisterInfo(Mips::RA), Subtarget(ST), TII(tii) {} +MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST) + : MipsGenRegisterInfo(Mips::RA), Subtarget(ST) {} unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } @@ -83,11 +81,11 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const { BitVector MipsRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { static const uint16_t ReservedCPURegs[] = { - Mips::ZERO, Mips::AT, Mips::K0, Mips::K1, Mips::SP + Mips::ZERO, Mips::K0, Mips::K1, Mips::SP }; static const uint16_t ReservedCPU64Regs[] = { - Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64, Mips::SP_64 + Mips::ZERO_64, Mips::K0_64, Mips::K1_64, Mips::SP_64 }; BitVector Reserved(getNumRegs()); @@ -96,41 +94,49 @@ getReservedRegs(const MachineFunction &MF) const { for (unsigned I = 0; I < array_lengthof(ReservedCPURegs); ++I) Reserved.set(ReservedCPURegs[I]); - if (Subtarget.hasMips64()) { - for (unsigned I = 0; I < array_lengthof(ReservedCPU64Regs); ++I) - Reserved.set(ReservedCPU64Regs[I]); + for (unsigned I = 0; I < array_lengthof(ReservedCPU64Regs); ++I) + Reserved.set(ReservedCPU64Regs[I]); + if (Subtarget.hasMips64()) { // Reserve all registers in AFGR64. for (RegIter Reg = Mips::AFGR64RegClass.begin(), EReg = Mips::AFGR64RegClass.end(); Reg != EReg; ++Reg) Reserved.set(*Reg); } else { - // Reserve all registers in CPU64Regs & FGR64. - for (RegIter Reg = Mips::CPU64RegsRegClass.begin(), - EReg = Mips::CPU64RegsRegClass.end(); Reg != EReg; ++Reg) - Reserved.set(*Reg); - + // Reserve all registers in FGR64. for (RegIter Reg = Mips::FGR64RegClass.begin(), EReg = Mips::FGR64RegClass.end(); Reg != EReg; ++Reg) Reserved.set(*Reg); } - // Reserve FP if this function should have a dedicated frame pointer register. if (MF.getTarget().getFrameLowering()->hasFP(MF)) { - Reserved.set(Mips::FP); - Reserved.set(Mips::FP_64); + if (Subtarget.inMips16Mode()) + Reserved.set(Mips::S0); + else { + Reserved.set(Mips::FP); + Reserved.set(Mips::FP_64); + } } // Reserve hardware registers. Reserved.set(Mips::HWR29); Reserved.set(Mips::HWR29_64); + // Reserve DSP control register. + Reserved.set(Mips::DSPCtrl); + // Reserve RA if in mips16 mode. if (Subtarget.inMips16Mode()) { Reserved.set(Mips::RA); Reserved.set(Mips::RA_64); } + // Reserve GP if small section is used. + if (Subtarget.useSmallSection()) { + Reserved.set(Mips::GP); + Reserved.set(Mips::GP_64); + } + return Reserved; } @@ -160,7 +166,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, "Instr doesn't have FrameIndex operand!"); } - DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n"; + DEBUG(errs() << "\nFunction : " << MF.getName() << "\n"; errs() << "<--------->\n" << MI); int FrameIndex = MI.getOperand(i).getIndex(); @@ -179,8 +185,12 @@ getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); bool IsN64 = Subtarget.isABI_N64(); - return TFI->hasFP(MF) ? (IsN64 ? Mips::FP_64 : Mips::FP) : - (IsN64 ? Mips::SP_64 : Mips::SP); + if (Subtarget.inMips16Mode()) + return TFI->hasFP(MF) ? Mips::S0 : Mips::SP; + else + return TFI->hasFP(MF) ? (IsN64 ? Mips::FP_64 : Mips::FP) : + (IsN64 ? Mips::SP_64 : Mips::SP); + } unsigned MipsRegisterInfo:: diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h index 9a05e94..78adf7f 100644 --- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h +++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h @@ -22,16 +22,14 @@ namespace llvm { class MipsSubtarget; -class TargetInstrInfo; class Type; class MipsRegisterInfo : public MipsGenRegisterInfo { protected: const MipsSubtarget &Subtarget; - const TargetInstrInfo &TII; public: - MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii); + MipsRegisterInfo(const MipsSubtarget &Subtarget); /// getRegisterNumbering - Given the enum value for some register, e.g. /// Mips::RA, return the number that it corresponds to (e.g. 31). diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td index b255e42..391c19e 100644 --- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td +++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td @@ -14,6 +14,8 @@ let Namespace = "Mips" in { def sub_fpeven : SubRegIndex; def sub_fpodd : SubRegIndex; def sub_32 : SubRegIndex; +def sub_lo : SubRegIndex; +def sub_hi : SubRegIndex; } // We have banks of 32 registers each. @@ -71,7 +73,7 @@ class HWR<bits<5> num, string n> : MipsReg<n> { let Namespace = "Mips" in { // General Purpose Registers def ZERO : MipsGPRReg< 0, "zero">, DwarfRegNum<[0]>; - def AT : MipsGPRReg< 1, "at">, DwarfRegNum<[1]>; + def AT : MipsGPRReg< 1, "1">, DwarfRegNum<[1]>; def V0 : MipsGPRReg< 2, "2">, DwarfRegNum<[2]>; def V1 : MipsGPRReg< 3, "3">, DwarfRegNum<[3]>; def A0 : MipsGPRReg< 4, "4">, DwarfRegNum<[4]>; @@ -105,7 +107,7 @@ let Namespace = "Mips" in { // General Purpose 64-bit Registers def ZERO_64 : Mips64GPRReg< 0, "zero", [ZERO]>, DwarfRegNum<[0]>; - def AT_64 : Mips64GPRReg< 1, "at", [AT]>, DwarfRegNum<[1]>; + def AT_64 : Mips64GPRReg< 1, "1", [AT]>, DwarfRegNum<[1]>; def V0_64 : Mips64GPRReg< 2, "2", [V0]>, DwarfRegNum<[2]>; def V1_64 : Mips64GPRReg< 3, "3", [V1]>, DwarfRegNum<[3]>; def A0_64 : Mips64GPRReg< 4, "4", [A0]>, DwarfRegNum<[4]>; @@ -239,16 +241,29 @@ let Namespace = "Mips" in { // fcc0 register def FCC0 : Register<"fcc0">; + // PC register + def PC : Register<"pc">; + // Hardware register $29 def HWR29 : Register<"29">; def HWR29_64 : Register<"29">; + + // Accum registers + let SubRegIndices = [sub_lo, sub_hi] in + def AC0 : RegisterWithSubRegs<"ac0", [LO, HI]>; + def AC1 : Register<"ac1">; + def AC2 : Register<"ac2">; + def AC3 : Register<"ac3">; + + def DSPCtrl : Register<"dspctrl">; } //===----------------------------------------------------------------------===// // Register Classes //===----------------------------------------------------------------------===// -def CPURegs : RegisterClass<"Mips", [i32], 32, (add +class CPURegsClass<list<ValueType> regTypes> : + RegisterClass<"Mips", regTypes, 32, (add // Reserved ZERO, AT, // Return Values and Arguments @@ -262,6 +277,9 @@ def CPURegs : RegisterClass<"Mips", [i32], 32, (add // Reserved K0, K1, GP, SP, FP, RA)>; +def CPURegs : CPURegsClass<[i32]>; +def DSPRegs : CPURegsClass<[v4i8, v2i16]>; + def CPU64Regs : RegisterClass<"Mips", [i64], 64, (add // Reserved ZERO_64, AT_64, @@ -284,6 +302,7 @@ def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>; +def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>; // 64bit fp: // * FGR64 - 32 64-bit registers @@ -319,3 +338,5 @@ def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>; def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>; def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>; +// Accumulator Registers +def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>; diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp index 1c59847..03f5176 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -22,7 +22,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -202,6 +203,19 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Mark $fp as used if function has dedicated frame pointer. if (hasFP(MF)) MRI.setPhysRegUsed(FP); + + // Set scavenging frame index if necessary. + uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() + + estimateStackSize(MF); + + if (isInt<16>(MaxSPOffset)) + return; + + const TargetRegisterClass *RC = STI.isABI_N64() ? + &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass; + int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment(), false); + RS->setScavengingFrameIndex(FI); } const MipsFrameLowering * diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp index eeb1de3..fb0f9df 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -260,14 +260,55 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, if (isInt<16>(Amount))// addi sp, sp, amount BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount); else { // Expand immediate that doesn't fit in 16-bit. - unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; - - MBB.getParent()->getInfo<MipsFunctionInfo>()->setEmitNOAT(); - Mips::loadImmediate(Amount, STI.isABI_N64(), *this, MBB, I, DL, false, 0); - BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(ATReg); + unsigned Reg = loadImmediate(Amount, MBB, I, DL, 0); + BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(Reg, RegState::Kill); } } +/// This function generates the sequence of instructions needed to get the +/// result of adding register REG and immediate IMM. +unsigned +MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, DebugLoc DL, + unsigned *NewImm) const { + MipsAnalyzeImmediate AnalyzeImm; + const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>(); + MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); + unsigned Size = STI.isABI_N64() ? 64 : 32; + unsigned LUi = STI.isABI_N64() ? Mips::LUi64 : Mips::LUi; + unsigned ZEROReg = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + const TargetRegisterClass *RC = STI.isABI_N64() ? + &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass; + bool LastInstrIsADDiu = NewImm; + + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu); + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); + + assert(Seq.size() && (!LastInstrIsADDiu || (Seq.size() > 1))); + + // The first instruction can be a LUi, which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + unsigned Reg = RegInfo.createVirtualRegister(RC); + + if (Inst->Opc == LUi) + BuildMI(MBB, II, DL, get(LUi), Reg).addImm(SignExtend64<16>(Inst->ImmOpnd)); + else + BuildMI(MBB, II, DL, get(Inst->Opc), Reg).addReg(ZEROReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + // Build the remaining instructions in Seq. + for (++Inst; Inst != Seq.end() - LastInstrIsADDiu; ++Inst) + BuildMI(MBB, II, DL, get(Inst->Opc), Reg).addReg(Reg, RegState::Kill) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + if (LastInstrIsADDiu) + *NewImm = Inst->ImmOpnd; + + return Reg; +} + unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h index 346e74d..55b78b2 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h +++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h @@ -15,7 +15,6 @@ #define MIPSSEINSTRUCTIONINFO_H #include "MipsInstrInfo.h" -#include "MipsAnalyzeImmediate.h" #include "MipsSERegisterInfo.h" namespace llvm { @@ -70,6 +69,13 @@ public: void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + /// Emit a series of instructions to load an immediate. If NewImm is a + /// non-NULL parameter, the last instruction is not emitted, but instead + /// its immediate operand is returned in NewImm. + unsigned loadImmediate(int64_t Imm, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, DebugLoc DL, + unsigned *NewImm) const; + private: virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; diff --git a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp index 043a1ef..56b9ba9 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -40,8 +41,18 @@ using namespace llvm; MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST, - const TargetInstrInfo &TII) - : MipsRegisterInfo(ST, TII) {} + const MipsSEInstrInfo &I) + : MipsRegisterInfo(ST), TII(I) {} + +bool MipsSERegisterInfo:: +requiresRegisterScavenging(const MachineFunction &MF) const { + return true; +} + +bool MipsSERegisterInfo:: +requiresFrameIndexScavenging(const MachineFunction &MF) const { + return true; +} // This function eliminate ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions @@ -72,7 +83,6 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); int MinCSFI = 0; @@ -91,8 +101,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // getFrameRegister() returns. unsigned FrameReg; - if (MipsFI->isOutArgFI(FrameIndex) || - (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) + if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; else FrameReg = getFrameRegister(MF); @@ -104,14 +113,11 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // - If the frame object is any of the following, its offset must be adjusted // by adding the size of the stack: // incoming argument, callee-saved register location or local variable. + bool IsKill = false; int64_t Offset; - if (MipsFI->isOutArgFI(FrameIndex)) - Offset = SPOffset; - else - Offset = SPOffset + (int64_t)StackSize; - - Offset += MI.getOperand(OpNo + 1).getImm(); + Offset = SPOffset + (int64_t)StackSize; + Offset += MI.getOperand(OpNo + 1).getImm(); DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); @@ -121,18 +127,17 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = II->getDebugLoc(); unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; - MipsAnalyzeImmediate::Inst LastInst(0, 0); + unsigned NewImm; - MipsFI->setEmitNOAT(); - Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true, - &LastInst); - BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg); + unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL, &NewImm); + BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg) + .addReg(Reg, RegState::Kill); - FrameReg = ATReg; - Offset = SignExtend64<16>(LastInst.ImmOpnd); + FrameReg = Reg; + Offset = SignExtend64<16>(NewImm); + IsKill = true; } - MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); + MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false, IsKill); MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); } diff --git a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.h index 4b17b33..7437bd3 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.h +++ b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.h @@ -18,11 +18,18 @@ #include "MipsRegisterInfo.h" namespace llvm { +class MipsSEInstrInfo; class MipsSERegisterInfo : public MipsRegisterInfo { + const MipsSEInstrInfo &TII; + public: MipsSERegisterInfo(const MipsSubtarget &Subtarget, - const TargetInstrInfo &TII); + const MipsSEInstrInfo &TII); + + bool requiresRegisterScavenging(const MachineFunction &MF) const; + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp index 11ff809..930af4d 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp @@ -25,12 +25,14 @@ using namespace llvm; void MipsSubtarget::anchor() { } MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool little) : + const std::string &FS, bool little, + Reloc::Model RM) : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), - HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false) + HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false), + HasDSP(false), HasDSPR2(false), IsAndroid(false) { std::string CPUName = CPU; if (CPUName.empty()) @@ -54,6 +56,9 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, // Is the target system Linux ? if (TT.find("linux") == std::string::npos) IsLinux = false; + + // Set UseSmallSection. + UseSmallSection = !IsLinux && (RM == Reloc::Static); } bool diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h index ba15362..ff69237 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h +++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h @@ -65,6 +65,9 @@ protected: // isLinux - Target system is Linux. Is false we consider ELFOS for now. bool IsLinux; + // UseSmallSection - Small section is used. + bool UseSmallSection; + /// Features related to the presence of specific instructions. // HasSEInReg - SEB and SEH (signext in register) instructions. @@ -89,6 +92,9 @@ protected: // InMips16 -- can process Mips16 instructions bool InMips16Mode; + // HasDSP, HasDSPR2 -- supports DSP ASE. + bool HasDSP, HasDSPR2; + // IsAndroid -- target is android bool IsAndroid; @@ -109,7 +115,7 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. MipsSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool little); + const std::string &FS, bool little, Reloc::Model RM); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -131,8 +137,11 @@ public: bool isNotSingleFloat() const { return !IsSingleFloat; } bool hasVFPU() const { return HasVFPU; } bool inMips16Mode() const { return InMips16Mode; } + bool hasDSP() const { return HasDSP; } + bool hasDSPR2() const { return HasDSPR2; } bool isAndroid() const { return IsAndroid; } bool isLinux() const { return IsLinux; } + bool useSmallSection() const { return UseSmallSection; } bool hasStandardEncoding() const { return !inMips16Mode(); } diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp index 03a024a..983ee21 100644 --- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp @@ -42,8 +42,8 @@ MipsTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL, bool isLittle) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, isLittle), - DataLayout(isLittle ? + Subtarget(TT, CPU, FS, isLittle, RM), + DL(isLittle ? (Subtarget.isABI_N64() ? "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") : @@ -52,7 +52,8 @@ MipsTargetMachine(const Target &T, StringRef TT, "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")), InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this, Subtarget)), - TLInfo(*this), TSInfo(*this), JITInfo() { + TLInfo(*this), TSInfo(*this), JITInfo(), + STTI(&TLInfo), VTTI(&TLInfo) { } void MipsebTargetMachine::anchor() { } diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h index 21b49e6..b54f5ce 100644 --- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h +++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h @@ -21,8 +21,9 @@ #include "MipsSelectionDAGInfo.h" #include "MipsSubtarget.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetTransformImpl.h" namespace llvm { class formatted_raw_ostream; @@ -30,12 +31,14 @@ class MipsRegisterInfo; class MipsTargetMachine : public LLVMTargetMachine { MipsSubtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment + const DataLayout DL; // Calculates type size & alignment const MipsInstrInfo *InstrInfo; const MipsFrameLowering *FrameLowering; MipsTargetLowering TLInfo; MipsSelectionDAGInfo TSInfo; MipsJITInfo JITInfo; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: MipsTargetMachine(const Target &T, StringRef TT, @@ -52,8 +55,8 @@ public: { return FrameLowering; } virtual const MipsSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual const TargetData *getTargetData() const - { return &DataLayout;} + virtual const DataLayout *getDataLayout() const + { return &DL;} virtual MipsJITInfo *getJITInfo() { return &JITInfo; } @@ -69,6 +72,13 @@ public: return &TSInfo; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } + // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp index 04dc60a..881908b 100644 --- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -13,7 +13,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ELF.h" @@ -26,6 +26,7 @@ SSThreshold("mips-ssection-threshold", cl::Hidden, void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); SmallDataSection = getContext().getELFSection(".sdata", ELF::SHT_PROGBITS, @@ -60,9 +61,10 @@ bool MipsTargetObjectFile:: IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, SectionKind Kind) const { - // Only use small section for non linux targets. const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>(); - if (Subtarget.isLinux()) + + // Return if small section is not available. + if (!Subtarget.useSmallSection()) return false; // Only global variables, not functions. @@ -80,7 +82,7 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, return false; Type *Ty = GV->getType()->getElementType(); - return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty)); + return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty)); } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.td b/contrib/llvm/lib/Target/NVPTX/NVPTX.td index ae7710e..7aee359 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTX.td +++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.td @@ -24,7 +24,30 @@ include "NVPTXInstrInfo.td" // - Need at least one feature to avoid generating zero sized array by // TableGen in NVPTXGenSubtarget.inc. //===----------------------------------------------------------------------===// -def FeatureDummy : SubtargetFeature<"dummy", "dummy", "true", "">; + +// SM Versions +def SM10 : SubtargetFeature<"sm_10", "SmVersion", "10", + "Target SM 1.0">; +def SM11 : SubtargetFeature<"sm_11", "SmVersion", "11", + "Target SM 1.1">; +def SM12 : SubtargetFeature<"sm_12", "SmVersion", "12", + "Target SM 1.2">; +def SM13 : SubtargetFeature<"sm_13", "SmVersion", "13", + "Target SM 1.3">; +def SM20 : SubtargetFeature<"sm_20", "SmVersion", "20", + "Target SM 2.0">; +def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21", + "Target SM 2.1">; +def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30", + "Target SM 3.0">; +def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35", + "Target SM 3.5">; + +// PTX Versions +def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30", + "Use PTX version 3.0">; +def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31", + "Use PTX version 3.1">; //===----------------------------------------------------------------------===// // NVPTX supported processors. @@ -33,7 +56,14 @@ def FeatureDummy : SubtargetFeature<"dummy", "dummy", "true", "">; class Proc<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; -def : Proc<"sm_10", [FeatureDummy]>; +def : Proc<"sm_10", [SM10]>; +def : Proc<"sm_11", [SM11]>; +def : Proc<"sm_12", [SM12]>; +def : Proc<"sm_13", [SM13]>; +def : Proc<"sm_20", [SM20]>; +def : Proc<"sm_21", [SM21]>; +def : Proc<"sm_30", [SM30]>; +def : Proc<"sm_35", [SM35]>; def NVPTXInstrInfo : InstrInfo { diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h index 24b3bd5..c7cabf6 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -16,7 +16,7 @@ #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" namespace llvm { @@ -31,7 +31,7 @@ public: NVPTXAllocaHoisting() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetData>(); + AU.addRequired<DataLayout>(); AU.addPreserved<MachineFunctionAnalysis>(); } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index f2b9616..0a885ce 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -68,7 +68,54 @@ static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src", cl::location(llvm::InterleaveSrcInPtx)); +namespace { +/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V +/// depends. +void DiscoverDependentGlobals(Value *V, + DenseSet<GlobalVariable*> &Globals) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + Globals.insert(GV); + else { + if (User *U = dyn_cast<User>(V)) { + for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { + DiscoverDependentGlobals(U->getOperand(i), Globals); + } + } + } +} +/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable +/// instances to be emitted, but only after any dependents have been added +/// first. +void VisitGlobalVariableForEmission(GlobalVariable *GV, + SmallVectorImpl<GlobalVariable*> &Order, + DenseSet<GlobalVariable*> &Visited, + DenseSet<GlobalVariable*> &Visiting) { + // Have we already visited this one? + if (Visited.count(GV)) return; + + // Do we have a circular dependency? + if (Visiting.count(GV)) + report_fatal_error("Circular dependency found in global variable set"); + + // Start visiting this global + Visiting.insert(GV); + + // Make sure we visit all dependents first + DenseSet<GlobalVariable*> Others; + for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) + DiscoverDependentGlobals(GV->getOperand(i), Others); + + for (DenseSet<GlobalVariable*>::iterator I = Others.begin(), + E = Others.end(); I != E; ++I) + VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); + + // Now we can visit ourself + Order.push_back(GV); + Visited.insert(GV); + Visiting.erase(GV); +} +} // @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we // cannot just link to the existing version. @@ -98,10 +145,10 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { switch (CE->getOpcode()) { default: // If the code isn't optimized, there may be outstanding folding - // opportunities. Attempt to fold the expression using TargetData as a + // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. if (Constant *C = - ConstantFoldConstantExpression(CE, AP.TM.getTargetData())) + ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) if (C != CE) return LowerConstant(C, AP); @@ -115,7 +162,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); // Generate a symbolic expression for the byte address const Constant *PtrVal = CE->getOperand(0); SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end()); @@ -145,7 +192,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { return LowerConstant(CE->getOperand(0), AP); case Instruction::IntToPtr: { - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); @@ -155,7 +202,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { } case Instruction::PtrToInt: { - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. Constant *Op = CE->getOperand(0); @@ -270,7 +317,7 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); const TargetLowering *TLI = TM.getTargetLowering(); Type *Ty = F->getReturnType(); @@ -874,7 +921,7 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) .Initialize(OutContext, TM); - Mang = new Mangler(OutContext, *TM.getTargetData()); + Mang = new Mangler(OutContext, *TM.getDataLayout()); // Emit header before any dwarf directives are emitted below. emitHeader(M, OS1); @@ -893,10 +940,27 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { emitDeclarations(M, OS2); - // Print out module-level global variables here. + // As ptxas does not support forward references of globals, we need to first + // sort the list of module-level globals in def-use order. We visit each + // global variable in order, and ensure that we emit it *after* its dependent + // globals. We use a little extra memory maintaining both a set and a list to + // have fast searches while maintaining a strict ordering. + SmallVector<GlobalVariable*,8> Globals; + DenseSet<GlobalVariable*> GVVisited; + DenseSet<GlobalVariable*> GVVisiting; + + // Visit each global variable, in order for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - printModuleLevelGV(I, OS2); + I != E; ++I) + VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); + + assert(GVVisited.size() == M.getGlobalList().size() && + "Missed a global variable"); + assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); + + // Print out module-level global variables in proper order + for (unsigned i = 0, e = Globals.size(); i != e; ++i) + printModuleLevelGV(Globals[i], OS2); OS2 << '\n'; @@ -910,7 +974,8 @@ void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) { O << "//\n"; O << "\n"; - O << ".version 3.0\n"; + unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); + O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; O << ".target "; O << nvptxSubtarget.getTargetName(); @@ -1023,7 +1088,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, return; } - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); // GlobalVariables are always constant pointers themselves. const PointerType *PTy = GVar->getType(); @@ -1296,7 +1361,7 @@ std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, raw_ostream &O) { - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); // GlobalVariables are always constant pointers themselves. const PointerType *PTy = GVar->getType(); @@ -1342,7 +1407,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, static unsigned int -getOpenCLAlignment(const TargetData *TD, +getOpenCLAlignment(const DataLayout *TD, Type *Ty) { if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) return TD->getPrefTypeAlignment(Ty); @@ -1421,7 +1486,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); const AttrListPtr &PAL = F->getAttributes(); const TargetLowering *TLI = TM.getTargetLowering(); Function::const_arg_iterator I, E; @@ -1456,7 +1521,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, continue; } - if (PAL.paramHasAttr(paramIndex+1, Attribute::ByVal) == false) { + if (PAL.getParamAttributes(paramIndex+1). + hasAttribute(Attributes::ByVal) == false) { // Just a scalar const PointerType *PTy = dyn_cast<PointerType>(Ty); if (isKernelFunc) { @@ -1524,6 +1590,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, // <a> = PAL.getparamalignment // size = typeallocsize of element type unsigned align = PAL.getParamAlignment(paramIndex+1); + if (align == 0) + align = TD->getABITypeAlignment(ETy); + unsigned sz = TD->getTypeAllocSize(ETy); O << "\t.param .align " << align << " .b8 "; @@ -1714,7 +1783,7 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) { - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); if (isa<UndefValue>(CPV) || CPV->isNullValue()) { int s = TD->getTypeAllocSize(CPV->getType()); @@ -1843,7 +1912,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, AggBuffer *aggBuffer) { - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); int Bytes; // Old constants diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 6ea10ea..f1a99d7 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -174,10 +174,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PTX does not support load / store predicate registers - setOperationAction(ISD::LOAD, MVT::i1, Expand); + setOperationAction(ISD::LOAD, MVT::i1, Custom); + setOperationAction(ISD::STORE, MVT::i1, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setOperationAction(ISD::STORE, MVT::i1, Expand); setTruncStoreAction(MVT::i64, MVT::i1, Expand); setTruncStoreAction(MVT::i32, MVT::i1, Expand); setTruncStoreAction(MVT::i16, MVT::i1, Expand); @@ -402,7 +403,7 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, if (isABI) { unsigned align = Outs[i].Flags.getByValAlign(); - unsigned sz = getTargetData()->getTypeAllocSize(ETy); + unsigned sz = getDataLayout()->getTypeAllocSize(ETy); O << ".param .align " << align << " .b8 "; O << "_"; @@ -655,11 +656,11 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, else { if (Func) { // direct call if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment)) - retAlignment = getTargetData()->getABITypeAlignment(retTy); + retAlignment = getDataLayout()->getABITypeAlignment(retTy); } else { // indirect call const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction()); if (!llvm::getAlign(*CallI, 0, retAlignment)) - retAlignment = getTargetData()->getABITypeAlignment(retTy); + retAlignment = getDataLayout()->getABITypeAlignment(retTy); } SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment, @@ -856,11 +857,64 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EXTRACT_SUBVECTOR: return Op; case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); default: llvm_unreachable("Custom lowering not defined for operation"); } } + +// v = ld i1* addr +// => +// v1 = ld i8* addr +// v = trunc v1 to i1 +SDValue NVPTXTargetLowering:: +LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + LoadSDNode *LD = cast<LoadSDNode>(Node); + DebugLoc dl = Node->getDebugLoc(); + assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ; + assert(Node->getValueType(0) == MVT::i1 && + "Custom lowering for i1 load only"); + SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), + LD->getAlignment()); + SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); + // The legalizer (the caller) is expecting two values from the legalized + // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() + // in LegalizeDAG.cpp which also uses MergeValues. + SDValue Ops[] = {result, LD->getChain()}; + return DAG.getMergeValues(Ops, 2, dl); +} + +// st i1 v, addr +// => +// v1 = zxt v to i8 +// st i8, addr +SDValue NVPTXTargetLowering:: +LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + StoreSDNode *ST = cast<StoreSDNode>(Node); + SDValue Tmp1 = ST->getChain(); + SDValue Tmp2 = ST->getBasePtr(); + SDValue Tmp3 = ST->getValue(); + assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, + MVT::i8, Tmp3); + SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + return Result; +} + + SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx, EVT v) const { @@ -916,7 +970,7 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); - const TargetData *TD = getTargetData(); + const DataLayout *TD = getDataLayout(); const Function *F = MF.getFunction(); const AttrListPtr &PAL = F->getAttributes(); @@ -965,7 +1019,7 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, // to newly created nodes. The SDNOdes for params have to // appear in the same order as their order of appearance // in the original function. "idx+1" holds that order. - if (PAL.paramHasAttr(i+1, Attribute::ByVal) == false) { + if (PAL.getParamAttributes(i+1).hasAttribute(Attributes::ByVal) == false) { // A plain scalar. if (isABI || isKernel) { // If ABI, load from the param symbol diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 86246e6..94a177c 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -138,6 +138,9 @@ private: SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; }; } // namespace llvm diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 56b2372..9273931 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -21,7 +21,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Support/InstIterator.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" using namespace llvm; @@ -110,7 +110,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { SmallVector<MemTransferInst *, 4> aggrMemcpys; SmallVector<MemSetInst *, 4> aggrMemsets; - TargetData *TD = &getAnalysis<TargetData>(); + DataLayout *TD = &getAnalysis<DataLayout>(); LLVMContext &Context = F.getParent()->getContext(); // diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h index ac7f150..b150c69 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -17,7 +17,7 @@ #include "llvm/Pass.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" namespace llvm { @@ -28,7 +28,7 @@ struct NVPTXLowerAggrCopies : public FunctionPass { NVPTXLowerAggrCopies() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetData>(); + AU.addRequired<DataLayout>(); AU.addPreserved<MachineFunctionAnalysis>(); } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp index 6aadd43..7b62cce 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -34,16 +34,18 @@ DriverInterface(cl::desc("Choose driver interface:"), NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit) -:NVPTXGenSubtargetInfo(TT, "", FS), // Don't pass CPU to subtarget, - // because we don't register all - // nvptx targets. - Is64Bit(is64Bit) { +: NVPTXGenSubtargetInfo(TT, CPU, FS), + Is64Bit(is64Bit), + PTXVersion(0), + SmVersion(10) { drvInterface = DriverInterface; // Provide the default CPU if none std::string defCPU = "sm_10"; + ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS); + // Get the TargetName from the FS if available if (FS.empty() && CPU.empty()) TargetName = defCPU; @@ -52,6 +54,12 @@ NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, else llvm_unreachable("we are not using FeatureStr"); - // Set up the SmVersion - SmVersion = atoi(TargetName.c_str()+3); + // We default to PTX 3.1, but we cannot just default to it in the initializer + // since the attribute parser checks if the given option is >= the default. + // So if we set ptx31 as the default, the ptx30 attribute would never match. + // Instead, we use 0 as the default and manually set 31 if the default is + // used. + if (PTXVersion == 0) { + PTXVersion = 31; + } } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index 8f2a629..3cfd971 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -25,13 +25,17 @@ namespace llvm { class NVPTXSubtarget : public NVPTXGenSubtargetInfo { - - unsigned int SmVersion; + std::string TargetName; NVPTX::DrvInterface drvInterface; - bool dummy; // For the 'dummy' feature, see NVPTX.td bool Is64Bit; + // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31 + unsigned PTXVersion; + + // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31 + unsigned int SmVersion; + public: /// This constructor initializes the data members to match that /// of the specified module. @@ -69,6 +73,8 @@ public: NVPTX::DrvInterface getDrvInterface() const { return drvInterface; } std::string getTargetName() const { return TargetName; } + unsigned getPTXVersion() const { return PTXVersion; } + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); std::string getDataLayout() const { diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 433f415..cbb4900 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -32,7 +32,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -71,8 +71,9 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, bool is64bit) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64bit), - DataLayout(Subtarget.getDataLayout()), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit) + DL(Subtarget.getDataLayout()), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit), + STTI(&TLInfo), VTTI(&TLInfo) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h index b3f9cac..11bc9d4 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -21,10 +21,11 @@ #include "NVPTXSubtarget.h" #include "NVPTXFrameLowering.h" #include "ManagedStringPool.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -32,7 +33,7 @@ namespace llvm { /// class NVPTXTargetMachine : public LLVMTargetMachine { NVPTXSubtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment + const DataLayout DL; // Calculates type size & alignment NVPTXInstrInfo InstrInfo; NVPTXTargetLowering TLInfo; TargetSelectionDAGInfo TSInfo; @@ -44,6 +45,9 @@ class NVPTXTargetMachine : public LLVMTargetMachine { // Hold Strings that can be free'd all together with NVPTXTargetMachine ManagedStringPool ManagedStrPool; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; + //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, // bool DisableVerify, MCContext *&OutCtx); @@ -58,7 +62,7 @@ public: return &FrameLowering; } virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetData *getTargetData() const { return &DataLayout;} + virtual const DataLayout *getDataLayout() const { return &DL;} virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;} virtual const NVPTXRegisterInfo *getRegisterInfo() const { @@ -72,6 +76,12 @@ public: virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } //virtual bool addInstSelector(PassManagerBase &PM, // CodeGenOpt::Level OptLevel); diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index d175e3e..3d58306 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -136,21 +136,21 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - char Value = MI->getOperand(OpNo).getImm(); - Value = (Value << (32-5)) >> (32-5); + int Value = MI->getOperand(OpNo).getImm(); + Value = SignExtend32<5>(Value); O << (int)Value; } void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - unsigned char Value = MI->getOperand(OpNo).getImm(); + unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 31 && "Invalid u5imm argument!"); O << (unsigned int)Value; } void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - unsigned char Value = MI->getOperand(OpNo).getImm(); + unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 63 && "Invalid u6imm argument!"); O << (unsigned int)Value; } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 1744738..87ecb13 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -29,9 +29,14 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case FK_Data_1: case FK_Data_2: case FK_Data_4: + case FK_Data_8: + case PPC::fixup_ppc_toc: return Value; + case PPC::fixup_ppc_lo14: + case PPC::fixup_ppc_toc16_ds: + return (Value & 0xffff) << 2; case PPC::fixup_ppc_brcond14: - return Value & 0x3ffc; + return Value & 0xfffc; case PPC::fixup_ppc_br24: return Value & 0x3fffffc; #if 0 @@ -41,6 +46,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case PPC::fixup_ppc_ha16: return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff; case PPC::fixup_ppc_lo16: + case PPC::fixup_ppc_toc16: return Value & 0xffff; } } @@ -72,7 +78,10 @@ public: { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_lo16", 16, 16, 0 }, { "fixup_ppc_ha16", 16, 16, 0 }, - { "fixup_ppc_lo14", 16, 14, 0 } + { "fixup_ppc_lo14", 16, 14, 0 }, + { "fixup_ppc_toc", 0, 64, 0 }, + { "fixup_ppc_toc16", 16, 16, 0 }, + { "fixup_ppc_toc16_ds", 16, 14, 0 } }; if (Kind < FirstTargetFixupKind) diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index a197981..dc93f71 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -11,6 +11,8 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" using namespace llvm; @@ -21,9 +23,15 @@ namespace { virtual ~PPCELFObjectWriter(); protected: + virtual unsigned getRelocTypeInner(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const; virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend) const; + virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const; virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset); }; } @@ -36,11 +44,13 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) PPCELFObjectWriter::~PPCELFObjectWriter() { } -unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel, - bool IsRelocWithSymbol, - int64_t Addend) const { +unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const +{ + MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? + MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + // determine the type of the relocation unsigned Type; if (IsPCRel) { @@ -61,17 +71,53 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, Type = ELF::R_PPC_ADDR24; break; case PPC::fixup_ppc_brcond14: - Type = ELF::R_PPC_ADDR14_BRTAKEN; // XXX: or BRNTAKEN?_ + Type = ELF::R_PPC_ADDR14; // XXX: or BRNTAKEN?_ break; case PPC::fixup_ppc_ha16: - Type = ELF::R_PPC_ADDR16_HA; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TPREL16_HA: + Type = ELF::R_PPC_TPREL16_HA; + break; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC_ADDR16_HA; + break; + } break; case PPC::fixup_ppc_lo16: - Type = ELF::R_PPC_ADDR16_LO; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TPREL16_LO: + Type = ELF::R_PPC_TPREL16_LO; + break; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC_ADDR16_LO; + break; + } break; case PPC::fixup_ppc_lo14: Type = ELF::R_PPC_ADDR14; break; + case PPC::fixup_ppc_toc: + Type = ELF::R_PPC64_TOC; + break; + case PPC::fixup_ppc_toc16: + Type = ELF::R_PPC64_TOC16; + break; + case PPC::fixup_ppc_toc16_ds: + Type = ELF::R_PPC64_TOC16_DS; + break; + case FK_Data_8: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TOC: + Type = ELF::R_PPC64_TOC; + break; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC64_ADDR64; + break; + } + break; case FK_Data_4: Type = ELF::R_PPC_ADDR32; break; @@ -83,11 +129,41 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, return Type; } +unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + return getRelocTypeInner(Target, Fixup, IsPCRel); +} + +const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + assert(Target.getSymA() && "SymA cannot be 0"); + const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol(); + + unsigned RelocType = getRelocTypeInner(Target, Fixup, IsPCRel); + + // The .odp creation emits a relocation against the symbol ".TOC." which + // create a R_PPC64_TOC relocation. However the relocation symbol name + // in final object creation should be NULL, since the symbol does not + // really exist, it is just the reference to TOC base for the current + // object file. + bool EmitThisSym = RelocType != ELF::R_PPC64_TOC; + + if (EmitThisSym && !Symbol.isTemporary()) + return &Symbol; + return NULL; +} + void PPCELFObjectWriter:: adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { switch ((unsigned)Fixup.getKind()) { case PPC::fixup_ppc_ha16: case PPC::fixup_ppc_lo16: + case PPC::fixup_ppc_toc16: + case PPC::fixup_ppc_toc16_ds: RelocOffset += 2; break; default: diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index b3c889e..37b265e 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -34,6 +34,16 @@ enum Fixups { /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs /// like 'std'. fixup_ppc_lo14, + + /// fixup_ppc_toc - Insert value of TOC base (.TOC.). + fixup_ppc_toc, + + /// fixup_ppc_toc16 - A 16-bit signed fixup relative to the TOC base. + fixup_ppc_toc16, + + /// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with + /// implied 2 zero bits + fixup_ppc_toc16_ds, // Marker LastTargetFixupKind, diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 245b457..215aa40 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -59,12 +59,10 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { HasLEB128 = true; // Target asm supports leb128 directives (little-endian) // Exceptions handling - if (!is64Bit) - ExceptionsType = ExceptionHandling::DwarfCFI; + ExceptionsType = ExceptionHandling::DwarfCFI; ZeroDirective = "\t.space\t"; Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; - LCOMMDirectiveType = LCOMM::NoAlignment; AssemblerDialect = 0; // Old-Style mnemonics. } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index f652422..2118302 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -15,7 +15,9 @@ #include "MCTargetDesc/PPCBaseInfo.h" #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/ErrorHandling.h" @@ -25,16 +27,28 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); namespace { class PPCMCCodeEmitter : public MCCodeEmitter { - PPCMCCodeEmitter(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT - + PPCMCCodeEmitter(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION; + + const MCSubtargetInfo &STI; + Triple TT; + public: PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) { + MCContext &ctx) + : STI(sti), TT(STI.getTargetTriple()) { } ~PPCMCCodeEmitter() {} + bool is64BitMode() const { + return (STI.getFeatureBits() & PPC::Feature64Bit) != 0; + } + + bool isSVR4ABI() const { + return TT.isMacOSX() == 0; + } + unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo, @@ -61,11 +75,19 @@ public: SmallVectorImpl<MCFixup> &Fixups) const; void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { - unsigned Bits = getBinaryCodeForInstr(MI, Fixups); + uint64_t Bits = getBinaryCodeForInstr(MI, Fixups); + + // BL8_NOPELF and BLA8_NOP_ELF is both size of 8 bacause of the + // following 'nop'. + unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value! + unsigned Opcode = MI.getOpcode(); + if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF) + Size = 8; // Output the constant in big endian byte order. - for (unsigned i = 0; i != 4; ++i) { - OS << (char)(Bits >> 24); + int ShiftValue = (Size * 8) - 8; + for (unsigned i = 0; i != Size; ++i) { + OS << (char)(Bits >> ShiftValue); Bits <<= 8; } @@ -140,8 +162,12 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits; // Add a fixup for the displacement field. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_lo16)); + if (isSVR4ABI() && is64BitMode()) + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_toc16)); + else + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo16)); return RegBits; } @@ -158,8 +184,12 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits; // Add a fixup for the branch target. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_lo14)); + if (isSVR4ABI() && is64BitMode()) + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_toc16_ds)); + else + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo14)); return RegBits; } @@ -168,7 +198,9 @@ unsigned PPCMCCodeEmitter:: get_crbitm_encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); - assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) && + assert((MI.getOpcode() == PPC::MTCRF || + MI.getOpcode() == PPC::MFOCRF || + MI.getOpcode() == PPC::MTCRF8) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); return 0x80 >> getPPCRegisterNumbering(MO.getReg()); } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 6568e82..4c2578d 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -70,7 +70,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) { // Initial state of the frame pointer is R1. MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(PPC::R1, 0); + MachineLocation Src(isPPC64? PPC::X1 : PPC::R1, 0); MAI->addInitialFrameState(0, Dst, Src); return MAI; diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td index b7f1688..cb15dad 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.td +++ b/contrib/llvm/lib/Target/PowerPC/PPC.td @@ -35,6 +35,10 @@ def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">; def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">; def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">; +def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective", + "PPC::DIR_E500mc", "">; +def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective", + "PPC::DIR_E5500", "">; def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; @@ -94,6 +98,12 @@ def : Processor<"g5", G5Itineraries, [Directive970, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; +def : ProcessorModel<"e500mc", PPCE500mcModel, + [DirectiveE500mc, FeatureMFOCRF, + FeatureSTFIWX, FeatureBookE, FeatureISEL]>; +def : ProcessorModel<"e5500", PPCE5500Model, + [DirectiveE5500, FeatureMFOCRF, Feature64Bit, + FeatureSTFIWX, FeatureBookE, FeatureISEL]>; def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, FeatureISEL, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index f76b89c..15d690b 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -54,12 +54,13 @@ #include "llvm/Support/ELF.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/MapVector.h" using namespace llvm; namespace { class PPCAsmPrinter : public AsmPrinter { protected: - DenseMap<MCSymbol*, MCSymbol*> TOC; + MapVector<MCSymbol*, MCSymbol*> TOC; const PPCSubtarget &Subtarget; uint64_t TOCLabelID; public: @@ -109,6 +110,8 @@ namespace { bool doFinalization(Module &M); virtual void EmitFunctionEntryLabel(); + + void EmitFunctionBodyEnd(); }; /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac @@ -282,8 +285,22 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'y': // A memory reference for an X-form instruction + { + const char *RegName = "r0"; + if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName); + O << RegName << ", "; + printOperand(MI, OpNo, O); + return false; + } + } + } + assert(MI->getOperand(OpNo).isReg()); O << "0("; printOperand(MI, OpNo, O); @@ -345,23 +362,37 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitLabel(PICBase); return; } + case PPC::LDtocJTI: + case PPC::LDtocCPT: case PPC::LDtoc: { // Transform %X3 = LDtoc <ga:@min1>, %X2 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); - + // Change the opcode to LD, and the global address operand to be a // reference to the TOC entry we will synthesize later. TmpInst.setOpcode(PPC::LD); const MachineOperand &MO = MI->getOperand(1); - assert(MO.isGlobal()); - - // Map symbol -> label of TOC entry. - MCSymbol *&TOCEntry = TOC[Mang->getSymbol(MO.getGlobal())]; - if (TOCEntry == 0) - TOCEntry = GetTempSymbol("C", TOCLabelID++); - + + // Map symbol -> label of TOC entry + assert(MO.isGlobal() || MO.isCPI() || MO.isJTI()); + MCSymbol *MOSymbol = 0; + if (MO.isGlobal()) + MOSymbol = Mang->getSymbol(MO.getGlobal()); + else if (MO.isCPI()) + MOSymbol = GetCPISymbol(MO.getIndex()); + else if (MO.isJTI()) + MOSymbol = GetJTISymbol(MO.getIndex()); + MCSymbol *&TOCEntry = TOC[MOSymbol]; + // To avoid name clash check if the name already exists. + while (TOCEntry == 0) { + if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) + + "C" + Twine(TOCLabelID++)) == 0) { + TOCEntry = GetTempSymbol("C", TOCLabelID); + } + } + const MCExpr *Exp = - MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, + MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); OutStreamer.EmitInstruction(TmpInst); @@ -404,11 +435,17 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.EmitValueToAlignment(8); MCSymbol *Symbol1 = OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName())); - MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.@tocbase")); + // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function + // entry point. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), - Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/); - OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext), - Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/); + 8/*size*/, 0/*addrspace*/); + MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.")); + // Generates a R_PPC64_TOC relocation for TOC base insertion. + OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, + MCSymbolRefExpr::VK_PPC_TOC, OutContext), + 8/*size*/, 0/*addrspace*/); + // Emit a null environment pointer. + OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */); OutStreamer.SwitchSection(Current); MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( @@ -419,7 +456,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { bool PPCLinuxAsmPrinter::doFinalization(Module &M) { - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); bool isPPC64 = TD->getPointerSizeInBits() == 64; @@ -429,18 +466,34 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { SectionKind::getReadOnly()); OutStreamer.SwitchSection(Section); - // FIXME: This is nondeterminstic! - for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(), + for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(), E = TOC.end(); I != E; ++I) { OutStreamer.EmitLabel(I->second); - OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) + - "[TC]," + I->first->getName()); + MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName()); + OutStreamer.EmitTCEntry(*S); } } return AsmPrinter::doFinalization(M); } +/// EmitFunctionBodyEnd - Print the traceback table before the .size +/// directive. +/// +void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() { + // Only the 64-bit target requires a traceback table. For now, + // we only emit the word of zeroes that GDB requires to find + // the end of the function, and zeroes for the eight-byte + // mandatory fields. + // FIXME: We should fill in the eight-byte mandatory fields as described in + // the PPC64 ELF ABI (this is a low-priority item because GDB does not + // currently make use of these fields). + if (Subtarget.isPPC64()) { + OutStreamer.EmitIntValue(0, 4/*size*/); + OutStreamer.EmitIntValue(0, 8/*size*/); + } +} + void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { static const char *const CPUDirectives[] = { "", @@ -453,6 +506,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "ppc750", "ppc970", "ppcA2", + "ppce500mc", + "ppce5500", "power6", "power7", "ppc64" @@ -508,7 +563,7 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) { void PPCDarwinAsmPrinter:: EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { - bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64; + bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64; const TargetLoweringObjectFileMachO &TLOFMacho = static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); @@ -603,7 +658,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { bool PPCDarwinAsmPrinter::doFinalization(Module &M) { - bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64; + bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64; // Darwin/PPC always uses mach-o. const TargetLoweringObjectFileMachO &TLOFMacho = diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td index b2b5364..3f87e88 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -12,12 +12,19 @@ // //===----------------------------------------------------------------------===// +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget<string F, CCAction A> + : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>; + //===----------------------------------------------------------------------===// // Return Value Calling Convention //===----------------------------------------------------------------------===// // Return-value convention for PowerPC def RetCC_PPC : CallingConv<[ + // On PPC64, integer return values are always promoted to i64 + CCIfType<[i32], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>, + CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index c24afa9..caf7bf2 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -13,6 +13,7 @@ #include "PPCFrameLowering.h" #include "PPCInstrInfo.h" +#include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -49,6 +50,11 @@ static const uint16_t VRRegNo[] = { /// to manipulate the VRSAVE register, even though it uses vector registers. /// This can happen when the only registers used are known to be live in or out /// of the function. Remove all of the VRSAVE related code from the function. +/// FIXME: The removal of the code results in a compile failure at -O0 when the +/// function contains a function call, as the GPR containing original VRSAVE +/// contents is spilled and reloaded around the call. Without the prolog code, +/// the spill instruction refers to an undefined register. This code needs +/// to account for all uses of that GPR. static void RemoveVRSaveCode(MachineInstr *MI) { MachineBasicBlock *Entry = MI->getParent(); MachineFunction *MF = Entry->getParent(); @@ -168,6 +174,11 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { MI->eraseFromParent(); } +static bool spillsCR(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->isCRSpilled(); +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { @@ -184,13 +195,22 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { // If we are a leaf function, and use up to 224 bytes of stack space, // don't have a frame pointer, calls, or dynamic alloca then we do not need - // to adjust the stack pointer (we fit in the Red Zone). - bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone); - // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. + // to adjust the stack pointer (we fit in the Red Zone). For 64-bit + // SVR4, we also require a stack frame if we need to spill the CR, + // since this spill area is addressed relative to the stack pointer. + bool DisableRedZone = MF.getFunction()->getFnAttributes(). + hasAttribute(Attributes::NoRedZone); + // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can + // still generate stackless code if all local vars are reg-allocated. + // Try: (FrameSize <= 224 + // || (FrameSize == 0 && Subtarget.isPPC32 && Subtarget.isSVR4ABI())) if (!DisableRedZone && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. + !(Subtarget.isPPC64() && // No 64-bit SVR4 CRsave. + Subtarget.isSVR4ABI() + && spillsCR(MF)) && (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame MFI->setStackSize(0); @@ -241,7 +261,7 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { // Naked functions have no stack frame pushed, so we don't have a frame // pointer. - if (MF.getFunction()->hasFnAttr(Attribute::Naked)) + if (MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked)) return false; return MF.getTarget().Options.DisableFramePointerElim(MF) || @@ -268,12 +288,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, // process it. - for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { - if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { - HandleVRSaveUpdate(MBBI, TII); - break; + if (!Subtarget.isSVR4ABI()) + for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { + if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { + HandleVRSaveUpdate(MBBI, TII); + break; + } } - } // Move MBBI back to the beginning of the function. MBBI = MBB.begin(); @@ -488,7 +509,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Add callee saved registers to move list. const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); unsigned Reg = CSI[I].getReg(); if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; @@ -497,6 +517,25 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { if (PPC::CRBITRCRegClass.contains(Reg)) continue; + // For SVR4, don't emit a move for the CR spill slot if we haven't + // spilled CRs. + if (Subtarget.isSVR4ABI() + && (PPC::CR2 <= Reg && Reg <= PPC::CR4) + && !spillsCR(MF)) + continue; + + // For 64-bit SVR4 when we have spilled CRs, the spill location + // is SP+8, not a frame-relative slot. + if (Subtarget.isSVR4ABI() + && Subtarget.isPPC64() + && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { + MachineLocation CSDst(PPC::X1, 8); + MachineLocation CSSrc(PPC::CR2); + Moves.push_back(MachineMove(Label, CSDst, CSSrc)); + continue; + } + + int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); MachineLocation CSDst(MachineLocation::VirtualFP, Offset); MachineLocation CSSrc(Reg); Moves.push_back(MachineMove(Label, CSDst, CSSrc)); @@ -714,11 +753,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } -static bool spillsCR(const MachineFunction &MF) { - const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); - return FuncInfo->isCRSpilled(); -} - /// MustSaveLR - Return true if this function requires that we save the LR /// register onto the stack in the prolog and restore it in the epilog of the /// function. @@ -808,7 +842,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) bool HasGPSaveArea = false; bool HasG8SaveArea = false; bool HasFPSaveArea = false; - bool HasCRSaveArea = false; bool HasVRSAVESaveArea = false; bool HasVRSaveArea = false; @@ -843,10 +876,9 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (Reg < MinFPR) { MinFPR = Reg; } -// FIXME SVR4: Disable CR save area for now. } else if (PPC::CRBITRCRegClass.contains(Reg) || PPC::CRRCRegClass.contains(Reg)) { -// HasCRSaveArea = true; + ; // do nothing, as we already know whether CRs are spilled } else if (PPC::VRSAVERCRegClass.contains(Reg)) { HasVRSAVESaveArea = true; } else if (PPC::VRRCRegClass.contains(Reg)) { @@ -926,16 +958,21 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } } - // The CR save area is below the general register save area. - if (HasCRSaveArea) { - // FIXME SVR4: Is it actually possible to have multiple elements in CSI - // which have the CR/CRBIT register class? + // For 32-bit only, the CR save area is below the general register + // save area. For 64-bit SVR4, the CR save area is addressed relative + // to the stack pointer and hence does not need an adjustment here. + // Only CR2 (the first nonvolatile spilled) has an associated frame + // index so that we have a single uniform save area. + if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) { // Adjust the frame index of the CR spill slot. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - if (PPC::CRBITRCRegClass.contains(Reg) || - PPC::CRRCRegClass.contains(Reg)) { + if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2) + // Leave Darwin logic as-is. + || (!Subtarget.isSVR4ABI() && + (PPC::CRBITRCRegClass.contains(Reg) || + PPC::CRRCRegClass.contains(Reg)))) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); @@ -973,3 +1010,184 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } } } + +bool +PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + + // Currently, this function only handles SVR4 32- and 64-bit ABIs. + // Return false otherwise to maintain pre-existing behavior. + if (!Subtarget.isSVR4ABI()) + return false; + + MachineFunction *MF = MBB.getParent(); + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo()); + DebugLoc DL; + bool CRSpilled = false; + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + // CR2 through CR4 are the nonvolatile CR fields. + bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; + + if (CRSpilled && IsCRField) + continue; + + // Add the callee-saved register as live-in; it's killed at the spill. + MBB.addLiveIn(Reg); + + // Insert the spill to the stack frame. + if (IsCRField) { + CRSpilled = true; + // The first time we see a CR field, store the whole CR into the + // save slot via GPR12 (available in the prolog for 32- and 64-bit). + if (Subtarget.isPPC64()) { + // 64-bit: SP+8 + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::X12)); + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW)) + .addReg(PPC::X12, + getKillRegState(true)) + .addImm(8) + .addReg(PPC::X1)); + } else { + // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have + // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)); + MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) + .addReg(PPC::R12, + getKillRegState(true)), + CSI[i].getFrameIdx())); + } + + // Record that we spill the CR in this function. + PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); + FuncInfo->setSpillsCR(); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, + CSI[i].getFrameIdx(), RC, TRI); + } + } + return true; +} + +static void +restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) { + + MachineFunction *MF = MBB.getParent(); + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo()); + DebugLoc DL; + unsigned RestoreOp, MoveReg; + + if (isPPC64) { + // 64-bit: SP+8 + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::X12) + .addImm(8) + .addReg(PPC::X1)); + RestoreOp = PPC::MTCRF8; + MoveReg = PPC::X12; + } else { + // 32-bit: FP-relative + MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), + PPC::R12), + CSI[CSIIndex].getFrameIdx())); + RestoreOp = PPC::MTCRF; + MoveReg = PPC::R12; + } + + if (CR2Spilled) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) + .addReg(MoveReg)); + + if (CR3Spilled) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) + .addReg(MoveReg)); + + if (CR4Spilled) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) + .addReg(MoveReg)); +} + +bool +PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + + // Currently, this function only handles SVR4 32- and 64-bit ABIs. + // Return false otherwise to maintain pre-existing behavior. + if (!Subtarget.isSVR4ABI()) + return false; + + MachineFunction *MF = MBB.getParent(); + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo()); + bool CR2Spilled = false; + bool CR3Spilled = false; + bool CR4Spilled = false; + unsigned CSIIndex = 0; + + // Initialize insertion-point logic; we will be restoring in reverse + // order of spill. + MachineBasicBlock::iterator I = MI, BeforeI = I; + bool AtStart = I == MBB.begin(); + + if (!AtStart) + --BeforeI; + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + + if (Reg == PPC::CR2) { + CR2Spilled = true; + // The spill slot is associated only with CR2, which is the + // first nonvolatile spilled. Save it here. + CSIIndex = i; + continue; + } else if (Reg == PPC::CR3) { + CR3Spilled = true; + continue; + } else if (Reg == PPC::CR4) { + CR4Spilled = true; + continue; + } else { + // When we first encounter a non-CR register after seeing at + // least one CR register, restore all spilled CRs together. + if ((CR2Spilled || CR3Spilled || CR4Spilled) + && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { + restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled, + MBB, I, CSI, CSIIndex); + CR2Spilled = CR3Spilled = CR4Spilled = false; + } + + // Default behavior for non-CR saves. + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), + RC, TRI); + assert(I != MBB.begin() && + "loadRegFromStackSlot didn't insert any code!"); + } + + // Insert in reverse order. + if (AtStart) + I = MBB.begin(); + else { + I = BeforeI; + ++I; + } + } + + // If we haven't yet spilled the CRs, do so now. + if (CR2Spilled || CR3Spilled || CR4Spilled) + restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled, + MBB, I, CSI, CSIIndex); + + return true; +} + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h index d708541..4d957b9 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -45,6 +45,16 @@ public: RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + /// targetHandlesStackFrameRounding - Returns true if the target is /// responsible for rounding up the stack frame (probably at emitPrologue /// time). @@ -170,23 +180,11 @@ public: {PPC::R15, -68}, {PPC::R14, -72}, - // CR save area offset. - // FIXME SVR4: Disable CR save area for now. -// {PPC::CR2, -4}, -// {PPC::CR3, -4}, -// {PPC::CR4, -4}, -// {PPC::CR2LT, -4}, -// {PPC::CR2GT, -4}, -// {PPC::CR2EQ, -4}, -// {PPC::CR2UN, -4}, -// {PPC::CR3LT, -4}, -// {PPC::CR3GT, -4}, -// {PPC::CR3EQ, -4}, -// {PPC::CR3UN, -4}, -// {PPC::CR4LT, -4}, -// {PPC::CR4GT, -4}, -// {PPC::CR4EQ, -4}, -// {PPC::CR4UN, -4}, + // CR save area offset. We map each of the nonvolatile CR fields + // to the slot for CR2, which is the first of the nonvolatile CR + // fields to be assigned, so that we only allocate one save slot. + // See PPCRegisterInfo::hasReservedSpillSlot() for more information. + {PPC::CR2, -4}, // VRSAVE save area offset. {PPC::VRSAVE, -4}, @@ -228,27 +226,6 @@ public: {PPC::F14, -144}, // General register save area offsets. - // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit - // mode? - {PPC::R31, -4}, - {PPC::R30, -12}, - {PPC::R29, -20}, - {PPC::R28, -28}, - {PPC::R27, -36}, - {PPC::R26, -44}, - {PPC::R25, -52}, - {PPC::R24, -60}, - {PPC::R23, -68}, - {PPC::R22, -76}, - {PPC::R21, -84}, - {PPC::R20, -92}, - {PPC::R19, -100}, - {PPC::R18, -108}, - {PPC::R17, -116}, - {PPC::R16, -124}, - {PPC::R15, -132}, - {PPC::R14, -140}, - {PPC::X31, -8}, {PPC::X30, -16}, {PPC::X29, -24}, @@ -268,24 +245,6 @@ public: {PPC::X15, -136}, {PPC::X14, -144}, - // CR save area offset. - // FIXME SVR4: Disable CR save area for now. -// {PPC::CR2, -4}, -// {PPC::CR3, -4}, -// {PPC::CR4, -4}, -// {PPC::CR2LT, -4}, -// {PPC::CR2GT, -4}, -// {PPC::CR2EQ, -4}, -// {PPC::CR2UN, -4}, -// {PPC::CR3LT, -4}, -// {PPC::CR3GT, -4}, -// {PPC::CR3EQ, -4}, -// {PPC::CR3UN, -4}, -// {PPC::CR4LT, -4}, -// {PPC::CR4GT, -4}, -// {PPC::CR4EQ, -4}, -// {PPC::CR4UN, -4}, - // VRSAVE save area offset. {PPC::VRSAVE, -4}, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index a00f686..254fea6 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -53,7 +53,9 @@ namespace { GlobalBaseReg = 0; SelectionDAGISel::runOnMachineFunction(MF); - InsertVRSaveCode(MF); + if (!PPCSubTarget.isSVR4ABI()) + InsertVRSaveCode(MF); + return true; } @@ -621,6 +623,88 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) { } } +// getVCmpInst: return the vector compare instruction for the specified +// vector type and condition code. Since this is for altivec specific code, +// only support the altivec types (v16i8, v8i16, v4i32, and v4f32). +static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) { + switch (CC) { + case ISD::SETEQ: + case ISD::SETUEQ: + case ISD::SETNE: + case ISD::SETUNE: + if (VecVT == MVT::v16i8) + return PPC::VCMPEQUB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPEQUH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPEQUW; + // v4f32 != v4f32 could be translate to unordered not equal + else if (VecVT == MVT::v4f32) + return PPC::VCMPEQFP; + break; + case ISD::SETLT: + case ISD::SETGT: + case ISD::SETLE: + case ISD::SETGE: + if (VecVT == MVT::v16i8) + return PPC::VCMPGTSB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPGTSH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPGTSW; + else if (VecVT == MVT::v4f32) + return PPC::VCMPGTFP; + break; + case ISD::SETULT: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULE: + if (VecVT == MVT::v16i8) + return PPC::VCMPGTUB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPGTUH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPGTUW; + break; + case ISD::SETOEQ: + if (VecVT == MVT::v4f32) + return PPC::VCMPEQFP; + break; + case ISD::SETOLT: + case ISD::SETOGT: + case ISD::SETOLE: + if (VecVT == MVT::v4f32) + return PPC::VCMPGTFP; + break; + case ISD::SETOGE: + if (VecVT == MVT::v4f32) + return PPC::VCMPGEFP; + break; + default: + break; + } + llvm_unreachable("Invalid integer vector compare condition"); +} + +// getVCmpEQInst: return the equal compare instruction for the specified vector +// type. Since this is for altivec specific code, only support the altivec +// types (v16i8, v8i16, v4i32, and v4f32). +static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) { + switch (VecVT) { + case MVT::v16i8: + return PPC::VCMPEQUB; + case MVT::v8i16: + return PPC::VCMPEQUH; + case MVT::v4i32: + return PPC::VCMPEQUW; + case MVT::v4f32: + return PPC::VCMPEQFP; + default: + llvm_unreachable("Invalid integer vector compare condition"); + } +} + + SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { DebugLoc dl = N->getDebugLoc(); unsigned Imm; @@ -701,10 +785,67 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { } } + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // Altivec Vector compare instructions do not set any CR register by default and + // vector compare operations return the same type as the operands. + if (LHS.getValueType().isVector()) { + EVT VecVT = LHS.getValueType(); + MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy; + unsigned int VCmpInst = getVCmpInst(VT, CC); + + switch (CC) { + case ISD::SETEQ: + case ISD::SETOEQ: + case ISD::SETUEQ: + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); + case ISD::SETNE: + case ISD::SETONE: + case ISD::SETUNE: { + SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPC::VNOR, VecVT, VCmp, VCmp); + } + case ISD::SETLT: + case ISD::SETOLT: + case ISD::SETULT: + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS); + case ISD::SETGT: + case ISD::SETOGT: + case ISD::SETUGT: + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); + case ISD::SETGE: + case ISD::SETOGE: + case ISD::SETUGE: { + // Small optimization: Altivec provides a 'Vector Compare Greater Than + // or Equal To' instruction (vcmpgefp), so in this case there is no + // need for extra logic for the equal compare. + if (VecVT.getSimpleVT().isFloatingPoint()) { + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); + } else { + SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); + unsigned int VCmpEQInst = getVCmpEQInst(VT); + SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ); + } + } + case ISD::SETLE: + case ISD::SETOLE: + case ISD::SETULE: { + SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0); + unsigned int VCmpEQInst = getVCmpEQInst(VT); + SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ); + } + default: + llvm_unreachable("Invalid vector compare type: should be expanded by legalize"); + } + } + bool Inv; int OtherCondIdx; unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx); - SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); + SDValue CCReg = SelectCC(LHS, RHS, CC, dl); SDValue IntCR; // Force the ccreg into CR7. @@ -717,7 +858,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { if (PPCSubTarget.hasMFOCRF() && OtherCondIdx == -1) IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, CCReg), 0); - else + else IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32, CR7Reg, CCReg), 0); @@ -975,6 +1116,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { case ISD::AND: { unsigned Imm, Imm2, SH, MB, ME; + uint64_t Imm64; // If this is an and of a value rotated between 0 and 31 bits and then and'd // with a mask, emit rlwinm @@ -993,6 +1135,14 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) }; return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); } + // If this is a 64-bit zero-extension mask, emit rldicl. + if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && + isMask_64(Imm64)) { + SDValue Val = N->getOperand(0); + MB = 64 - CountTrailingOnes_64(Imm64); + SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB) }; + return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3); + } // AND X, 0 -> 0, not "rlwinm 32". if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { ReplaceUses(SDValue(N, 0), N->getOperand(1)); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 61d44c5..adf78d5 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -361,6 +361,22 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); + + for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) { + MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j; + setTruncStoreAction(VT, InnerVT, Expand); + } + setLoadExtAction(ISD::SEXTLOAD, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, Expand); + } + + for (unsigned i = (unsigned)MVT::FIRST_FP_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_FP_VECTOR_VALUETYPE; ++i) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)i; + setOperationAction(ISD::FSQRT, VT, Expand); } // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle @@ -373,6 +389,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::LOAD , MVT::v4i32, Legal); setOperationAction(ISD::SELECT, MVT::v4i32, Expand); setOperationAction(ISD::STORE , MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); @@ -392,6 +412,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + + // Altivec does not contain unordered floating-point compare instructions + setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand); } if (Subtarget->has64BitSupport()) { @@ -449,6 +477,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setSchedulingPreference(Sched::Hybrid); computeRegisterProperties(); + + // The Freescale cores does better with aggressive inlining of memcpy and + // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). + if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || + Subtarget->getDarwinDirective() == PPC::DIR_E5500) { + maxStoresPerMemset = 32; + maxStoresPerMemsetOptSize = 16; + maxStoresPerMemcpy = 32; + maxStoresPerMemcpyOptSize = 8; + maxStoresPerMemmove = 32; + maxStoresPerMemmoveOptSize = 8; + + setPrefFunctionAlignment(4); + benefitFromCodePlacementOpt = true; + } } /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate @@ -517,11 +560,15 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; case PPCISD::MTFSF: return "PPCISD::MTFSF"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; + case PPCISD::CR6SET: return "PPCISD::CR6SET"; + case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; } } EVT PPCTargetLowering::getSetCCResultType(EVT VT) const { - return MVT::i32; + if (!VT.isVector()) + return MVT::i32; + return VT.changeVectorElementTypeToInteger(); } //===----------------------------------------------------------------------===// @@ -811,14 +858,13 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { } // Properly sign extend the value. - int ShAmt = (4-ByteSize)*8; - int MaskVal = ((int)Value << ShAmt) >> ShAmt; + int MaskVal = SignExtend32(Value, ByteSize * 8); // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. if (MaskVal == 0) return SDValue(); // Finally, if this value fits in a 5 bit sext field, return it - if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) + if (SignExtend32<5>(MaskVal) == MaskVal) return DAG.getTargetConstant(MaskVal, MVT::i32); return SDValue(); } @@ -1204,6 +1250,14 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); const Constant *C = CP->getConstVal(); + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. + if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); + return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA, + DAG.getRegister(PPC::X2, MVT::i64)); + } + unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); SDValue CPIHi = @@ -1217,6 +1271,14 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. + if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA, + DAG.getRegister(PPC::X2, MVT::i64)); + } + unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); @@ -1232,8 +1294,8 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); - SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag); - SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag); + SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); + SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); } @@ -1441,7 +1503,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, MachinePointerInfo(), MVT::i32, false, false, 0); - return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), + return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, false, 0); } @@ -1461,7 +1523,7 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); Type *IntPtrTy = - DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType( + DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( *DAG.getContext()); TargetLowering::ArgListTy Args; @@ -1684,9 +1746,13 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { - return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins, - dl, DAG, InVals); + if (PPCSubTarget.isSVR4ABI()) { + if (PPCSubTarget.isPPC64()) + return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, + dl, DAG, InVals); + else + return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, + dl, DAG, InVals); } else { return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); @@ -1694,7 +1760,7 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain, } SDValue -PPCTargetLowering::LowerFormalArguments_SVR4( +PPCTargetLowering::LowerFormalArguments_32SVR4( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> @@ -1911,6 +1977,334 @@ PPCTargetLowering::LowerFormalArguments_SVR4( return Chain; } +// PPC64 passes i8, i16, and i32 values in i64 registers. Promote +// value to MVT::i64 and then truncate to the correct register size. +SDValue +PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, + SelectionDAG &DAG, SDValue ArgVal, + DebugLoc dl) const { + if (Flags.isSExt()) + ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, + DAG.getValueType(ObjectVT)); + else if (Flags.isZExt()) + ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, + DAG.getValueType(ObjectVT)); + + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); +} + +// Set the size that is at least reserved in caller of this function. Tail +// call optimized functions' reserved stack space needs to be aligned so that +// taking the difference between two stack areas will result in an aligned +// stack. +void +PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG, + unsigned nAltivecParamsAtEnd, + unsigned MinReservedArea, + bool isPPC64) const { + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + // Add the Altivec parameters at the end, if needed. + if (nAltivecParamsAtEnd) { + MinReservedArea = ((MinReservedArea+15)/16)*16; + MinReservedArea += 16*nAltivecParamsAtEnd; + } + MinReservedArea = + std::max(MinReservedArea, + PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); + unsigned TargetAlign + = DAG.getMachineFunction().getTarget().getFrameLowering()-> + getStackAlignment(); + unsigned AlignMask = TargetAlign-1; + MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; + FI->setMinReservedArea(MinReservedArea); +} + +SDValue +PPCTargetLowering::LowerFormalArguments_64SVR4( + SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + // TODO: add description of PPC stack frame format, or at least some docs. + // + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Potential tail calls could cause overwriting of argument stack slots. + bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && + (CallConv == CallingConv::Fast)); + unsigned PtrByteSize = 8; + + unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); + // Area that is at least reserved in caller of this function. + unsigned MinReservedArea = ArgOffset; + + static const uint16_t GPR[] = { + PPC::X3, PPC::X4, PPC::X5, PPC::X6, + PPC::X7, PPC::X8, PPC::X9, PPC::X10, + }; + + static const uint16_t *FPR = GetFPR(); + + static const uint16_t VR[] = { + PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, + PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 + }; + + const unsigned Num_GPR_Regs = array_lengthof(GPR); + const unsigned Num_FPR_Regs = 13; + const unsigned Num_VR_Regs = array_lengthof(VR); + + unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + + // Add DAG nodes to load the arguments or copy them out of registers. On + // entry to a function on PPC, the arguments start after the linkage area, + // although the first ones are often in registers. + + SmallVector<SDValue, 8> MemOps; + unsigned nAltivecParamsAtEnd = 0; + Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { + SDValue ArgVal; + bool needsLoad = false; + EVT ObjectVT = Ins[ArgNo].VT; + unsigned ObjSize = ObjectVT.getSizeInBits()/8; + unsigned ArgSize = ObjSize; + ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; + + unsigned CurArgOffset = ArgOffset; + + // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. + if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || + ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { + if (isVarArg) { + MinReservedArea = ((MinReservedArea+15)/16)*16; + MinReservedArea += CalculateStackSlotSize(ObjectVT, + Flags, + PtrByteSize); + } else + nAltivecParamsAtEnd++; + } else + // Calculate min reserved area. + MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, + Flags, + PtrByteSize); + + // FIXME the codegen can be much improved in some cases. + // We do not have to keep everything in memory. + if (Flags.isByVal()) { + // ObjSize is the true size, ArgSize rounded up to multiple of registers. + ObjSize = Flags.getByValSize(); + ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + // Empty aggregate parameters do not take up registers. Examples: + // struct { } a; + // union { } b; + // int c[0]; + // etc. However, we have to provide a place-holder in InVals, so + // pretend we have an 8-byte item at the current address for that + // purpose. + if (!ObjSize) { + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + InVals.push_back(FIN); + continue; + } + // All aggregates smaller than 8 bytes must be passed right-justified. + if (ObjSize < PtrByteSize) + CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); + // The value of the object is its address. + int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + InVals.push_back(FIN); + + if (ObjSize < 8) { + if (GPR_idx != Num_GPR_Regs) { + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + SDValue Store; + + if (ObjSize==1 || ObjSize==2 || ObjSize==4) { + EVT ObjType = (ObjSize == 1 ? MVT::i8 : + (ObjSize == 2 ? MVT::i16 : MVT::i32)); + Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(FuncArg, CurArgOffset), + ObjType, false, false, 0); + } else { + // For sizes that don't fit a truncating store (3, 5, 6, 7), + // store the whole register as-is to the parameter save area + // slot. The address of the parameter was already calculated + // above (InVals.push_back(FIN)) to be the right-justified + // offset within the slot. For this store, we need a new + // frame index that points at the beginning of the slot. + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(FuncArg, ArgOffset), + false, false, 0); + } + + MemOps.push_back(Store); + ++GPR_idx; + } + // Whether we copied from a register or not, advance the offset + // into the parameter save area by a full doubleword. + ArgOffset += PtrByteSize; + continue; + } + + for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { + // Store whatever pieces of the object are in registers + // to memory. ArgOffset will be the address of the beginning + // of the object. + if (GPR_idx != Num_GPR_Regs) { + unsigned VReg; + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(FuncArg, ArgOffset), + false, false, 0); + MemOps.push_back(Store); + ++GPR_idx; + ArgOffset += PtrByteSize; + } else { + ArgOffset += ArgSize - j; + break; + } + } + continue; + } + + switch (ObjectVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unhandled argument type!"); + case MVT::i32: + case MVT::i64: + if (GPR_idx != Num_GPR_Regs) { + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); + + if (ObjectVT == MVT::i32) + // PPC64 passes i8, i16, and i32 values in i64 registers. Promote + // value to MVT::i64 and then truncate to the correct register size. + ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); + + ++GPR_idx; + } else { + needsLoad = true; + ArgSize = PtrByteSize; + } + ArgOffset += 8; + break; + + case MVT::f32: + case MVT::f64: + // Every 8 bytes of argument space consumes one of the GPRs available for + // argument passing. + if (GPR_idx != Num_GPR_Regs) { + ++GPR_idx; + } + if (FPR_idx != Num_FPR_Regs) { + unsigned VReg; + + if (ObjectVT == MVT::f32) + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); + else + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); + + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); + ++FPR_idx; + } else { + needsLoad = true; + ArgSize = PtrByteSize; + } + + ArgOffset += 8; + break; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + // Note that vector arguments in registers don't reserve stack space, + // except in varargs functions. + if (VR_idx != Num_VR_Regs) { + unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); + if (isVarArg) { + while ((ArgOffset % 16) != 0) { + ArgOffset += PtrByteSize; + if (GPR_idx != Num_GPR_Regs) + GPR_idx++; + } + ArgOffset += 16; + GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? + } + ++VR_idx; + } else { + // Vectors are aligned. + ArgOffset = ((ArgOffset+15)/16)*16; + CurArgOffset = ArgOffset; + ArgOffset += 16; + needsLoad = true; + } + break; + } + + // We need to load the argument to a virtual register if we determined + // above that we ran out of physical registers of the appropriate type. + if (needsLoad) { + int FI = MFI->CreateFixedObject(ObjSize, + CurArgOffset + (ArgSize - ObjSize), + isImmutable); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), + false, false, false, 0); + } + + InVals.push_back(ArgVal); + } + + // Set the size that is at least reserved in caller of this function. Tail + // call optimized functions' reserved stack space needs to be aligned so that + // taking the difference between two stack areas will result in an aligned + // stack. + setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true); + + // If the function takes variable number of arguments, make a frame index for + // the start of the first vararg value... for expansion of llvm.va_start. + if (isVarArg) { + int Depth = ArgOffset; + + FuncInfo->setVarArgsFrameIndex( + MFI->CreateFixedObject(PtrByteSize, Depth, true)); + SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); + + // If this function is vararg, store any remaining integer argument regs + // to their spots on the stack so that they may be loaded by deferencing the + // result of va_next. + for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(), false, false, 0); + MemOps.push_back(Store); + // Increment the address by four for the next argument to store + SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + } + + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, + MVT::Other, &MemOps[0], MemOps.size()); + + return Chain; +} + SDValue PPCTargetLowering::LowerFormalArguments_Darwin( SDValue Chain, @@ -1987,10 +2381,12 @@ PPCTargetLowering::LowerFormalArguments_Darwin( default: llvm_unreachable("Unhandled argument type!"); case MVT::i32: case MVT::f32: - VecArgOffset += isPPC64 ? 8 : 4; + VecArgOffset += 4; break; case MVT::i64: // PPC64 case MVT::f64: + // FIXME: We are guaranteed to be !isPPC64 at this point. + // Does MVT::i64 apply? VecArgOffset += 8; break; case MVT::v4f32: @@ -2013,7 +2409,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SmallVector<SDValue, 8> MemOps; unsigned nAltivecParamsAtEnd = 0; - for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { + Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; @@ -2061,10 +2458,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin( else VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(), - ObjSize==1 ? MVT::i8 : MVT::i16, - false, false, 0); + MachinePointerInfo(FuncArg, + CurArgOffset), + ObjType, false, false, 0); MemOps.push_back(Store); ++GPR_idx; } @@ -2075,8 +2473,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( } for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { // Store whatever pieces of the object are in registers - // to memory. ArgVal will be address of the beginning of - // the object. + // to memory. ArgOffset will be the address of the beginning + // of the object. if (GPR_idx != Num_GPR_Regs) { unsigned VReg; if (isPPC64) @@ -2087,7 +2485,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(), + MachinePointerInfo(FuncArg, ArgOffset), false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -2122,18 +2520,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); - if (ObjectVT == MVT::i32) { + if (ObjectVT == MVT::i32) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. - if (Flags.isSExt()) - ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, - DAG.getValueType(ObjectVT)); - else if (Flags.isZExt()) - ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, - DAG.getValueType(ObjectVT)); - - ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); - } + ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); ++GPR_idx; } else { @@ -2220,23 +2610,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin( } // Set the size that is at least reserved in caller of this function. Tail - // call optimized function's reserved stack space needs to be aligned so that + // call optimized functions' reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. - PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); - // Add the Altivec parameters at the end, if needed. - if (nAltivecParamsAtEnd) { - MinReservedArea = ((MinReservedArea+15)/16)*16; - MinReservedArea += 16*nAltivecParamsAtEnd; - } - MinReservedArea = - std::max(MinReservedArea, - PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); - unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> - getStackAlignment(); - unsigned AlignMask = TargetAlign-1; - MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; - FI->setMinReservedArea(MinReservedArea); + setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. @@ -2276,8 +2653,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( return Chain; } -/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus -/// linkage area for the Darwin ABI. +/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus +/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI. static unsigned CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, bool isPPC64, @@ -2408,7 +2785,7 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { int Addr = C->getZExtValue(); if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. - (Addr << 6 >> 6) != Addr) + SignExtend32<26>(Addr) != Addr) return 0; // Top 6 bits have to be sext of immediate. return DAG.getConstant((int)C->getZExtValue() >> 2, @@ -2686,7 +3063,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // Thus for a call through a function pointer, the following actions need // to be performed: // 1. Save the TOC of the caller in the TOC save area of its stack - // frame (this is done in LowerCall_Darwin()). + // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). // 2. Load the address of the function entry point from the function // descriptor. // 3. Load the TOC of the callee from the function descriptor into r2. @@ -2776,6 +3153,15 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, return CallOpc; } +static +bool isLocalCall(const SDValue &Callee) +{ + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) + return !G->getGlobal()->isDeclaration() && + !G->getGlobal()->isWeakForLinker(); + return false; +} + SDValue PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, @@ -2791,12 +3177,32 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Copy all of the result registers out of their specified physreg. for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; - EVT VT = VA.getValVT(); assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyFromReg(Chain, dl, - VA.getLocReg(), VT, InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - InFlag = Chain.getValue(2); + + SDValue Val = DAG.getCopyFromReg(Chain, dl, + VA.getLocReg(), VA.getLocVT(), InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::AExt: + Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); + break; + case CCValAssign::ZExt: + Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, + DAG.getValueType(VA.getValVT())); + Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); + break; + case CCValAssign::SExt: + Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, + DAG.getValueType(VA.getValVT())); + Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); + break; + } + + InVals.push_back(Val); } return Chain; @@ -2819,6 +3225,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, isTailCall, RegsToPass, Ops, NodeTys, PPCSubTarget); + // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls + if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) + Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); + // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in // PPCRegisterInfo::eliminateCallFramePseudoInstr. @@ -2880,8 +3290,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. needsTOCRestore = true; - } else if (CallOpc == PPCISD::CALL_SVR4) { - // Otherwise insert NOP. + } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) { + // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP_SVR4; } } @@ -2923,10 +3333,16 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG); - if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) - return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, OutVals, Ins, - dl, DAG, InVals); + if (PPCSubTarget.isSVR4ABI()) { + if (PPCSubTarget.isPPC64()) + return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, + isTailCall, Outs, OutVals, Ins, + dl, DAG, InVals); + else + return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, + isTailCall, Outs, OutVals, Ins, + dl, DAG, InVals); + } return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, isTailCall, Outs, OutVals, Ins, @@ -2934,15 +3350,15 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } SDValue -PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { - // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description +PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description // of the 32-bit SVR4 ABI stack frame layout. assert((CallConv == CallingConv::C || @@ -3116,12 +3532,406 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Set CR6 to true if this is a vararg call with floating args passed in + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // Set CR bit 6 to true if this is a vararg call with floating args passed in // registers. if (isVarArg) { - SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET, - dl, MVT::i32), 0); - RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR)); + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Ops[] = { Chain, InFlag }; + + Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, + dl, VTs, Ops, InFlag.getNode() ? 2 : 1); + + InFlag = Chain.getValue(1); + } + + if (isTailCall) + PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, + false, TailCallArguments); + + return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, + RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, + Ins, InVals); +} + +// Copy an argument into memory, being careful to do this outside the +// call sequence for the call to which the argument belongs. +SDValue +PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, + SDValue CallSeqStart, + ISD::ArgFlagsTy Flags, + SelectionDAG &DAG, + DebugLoc dl) const { + SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, + CallSeqStart.getNode()->getOperand(0), + Flags, DAG, dl); + // The MEMCPY must go outside the CALLSEQ_START..END. + SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, + CallSeqStart.getNode()->getOperand(1)); + DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), + NewCallSeqStart.getNode()); + return NewCallSeqStart; +} + +SDValue +PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + + unsigned NumOps = Outs.size(); + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + unsigned PtrByteSize = 8; + + MachineFunction &MF = DAG.getMachineFunction(); + + // Mark this function as potentially containing a function that contains a + // tail call. As a consequence the frame pointer will be used for dynamicalloc + // and restoring the callers stack pointer in this functions epilog. This is + // done because by tail calling the called function might overwrite the value + // in this function's (MF) stack pointer stack slot 0(SP). + if (getTargetMachine().Options.GuaranteedTailCallOpt && + CallConv == CallingConv::Fast) + MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); + + unsigned nAltivecParamsAtEnd = 0; + + // Count how many bytes are to be pushed on the stack, including the linkage + // area, and parameter passing area. We start with at least 48 bytes, which + // is reserved space for [SP][CR][LR][3 x unused]. + // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result + // of this call. + unsigned NumBytes = + CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv, + Outs, OutVals, nAltivecParamsAtEnd); + + // Calculate by how many bytes the stack has to be adjusted in case of tail + // call optimization. + int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); + + // To protect arguments on the stack from being clobbered in a tail call, + // force all the loads to happen before doing any other lowering. + if (isTailCall) + Chain = DAG.getStackArgumentTokenFactor(Chain); + + // Adjust the stack pointer for the new arguments... + // These operations are automatically eliminated by the prolog/epilog pass + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + SDValue CallSeqStart = Chain; + + // Load the return address and frame pointer so it can be move somewhere else + // later. + SDValue LROp, FPOp; + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, + dl); + + // Set up a copy of the stack pointer for use loading and storing any + // arguments that may not fit in the registers available for argument + // passing. + SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); + + // Figure out which arguments are going to go in registers, and which in + // memory. Also, if this is a vararg function, floating point operations + // must be stored to our stack, and loaded into integer regs as well, if + // any integer regs are available for argument passing. + unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); + unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + + static const uint16_t GPR[] = { + PPC::X3, PPC::X4, PPC::X5, PPC::X6, + PPC::X7, PPC::X8, PPC::X9, PPC::X10, + }; + static const uint16_t *FPR = GetFPR(); + + static const uint16_t VR[] = { + PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, + PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 + }; + const unsigned NumGPRs = array_lengthof(GPR); + const unsigned NumFPRs = 13; + const unsigned NumVRs = array_lengthof(VR); + + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + SmallVector<TailCallArgumentInfo, 8> TailCallArguments; + + SmallVector<SDValue, 8> MemOpChains; + for (unsigned i = 0; i != NumOps; ++i) { + SDValue Arg = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + + // PtrOff will be used to store the current argument to the stack if a + // register cannot be found for it. + SDValue PtrOff; + + PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + + // Promote integers to 64-bit values. + if (Arg.getValueType() == MVT::i32) { + // FIXME: Should this use ANY_EXTEND if neither sext nor zext? + unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); + } + + // FIXME memcpy is used way more than necessary. Correctness first. + // Note: "by value" is code for passing a structure by value, not + // basic types. + if (Flags.isByVal()) { + // Note: Size includes alignment padding, so + // struct x { short a; char b; } + // will have Size = 4. With #pragma pack(1), it will have Size = 3. + // These are the proper values we need for right-justifying the + // aggregate in a parameter register. + unsigned Size = Flags.getByValSize(); + + // An empty aggregate parameter takes up no storage and no + // registers. + if (Size == 0) + continue; + + // All aggregates smaller than 8 bytes must be passed right-justified. + if (Size==1 || Size==2 || Size==4) { + EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); + if (GPR_idx != NumGPRs) { + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, + MachinePointerInfo(), VT, + false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + + ArgOffset += PtrByteSize; + continue; + } + } + + if (GPR_idx == NumGPRs && Size < 8) { + SDValue Const = DAG.getConstant(PtrByteSize - Size, + PtrOff.getValueType()); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, + CallSeqStart, + Flags, DAG, dl); + ArgOffset += PtrByteSize; + continue; + } + // Copy entire object into memory. There are cases where gcc-generated + // code assumes it is there, even if it could be put entirely into + // registers. (This is not what the doc says.) + + // FIXME: The above statement is likely due to a misunderstanding of the + // documents. All arguments must be copied into the parameter area BY + // THE CALLEE in the event that the callee takes the address of any + // formal argument. That has not yet been implemented. However, it is + // reasonable to use the stack area as a staging area for the register + // load. + + // Skip this for small aggregates, as we will use the same slot for a + // right-justified copy, below. + if (Size >= 8) + Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, + CallSeqStart, + Flags, DAG, dl); + + // When a register is available, pass a small aggregate right-justified. + if (Size < 8 && GPR_idx != NumGPRs) { + // The easiest way to get this right-justified in a register + // is to copy the structure into the rightmost portion of a + // local variable slot, then load the whole slot into the + // register. + // FIXME: The memcpy seems to produce pretty awful code for + // small aggregates, particularly for packed ones. + // FIXME: It would be preferable to use the slot in the + // parameter save area instead of a new local variable. + SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, + CallSeqStart, + Flags, DAG, dl); + + // Load the slot into the register. + SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, + MachinePointerInfo(), + false, false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + + // Done with this argument. + ArgOffset += PtrByteSize; + continue; + } + + // For aggregates larger than PtrByteSize, copy the pieces of the + // object that fit into registers from the parameter save area. + for (unsigned j=0; j<Size; j+=PtrByteSize) { + SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); + SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); + if (GPR_idx != NumGPRs) { + SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, + MachinePointerInfo(), + false, false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + ArgOffset += PtrByteSize; + } else { + ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; + break; + } + } + continue; + } + + switch (Arg.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i32: + case MVT::i64: + if (GPR_idx != NumGPRs) { + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); + } else { + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + true, isTailCall, false, MemOpChains, + TailCallArguments, dl); + } + ArgOffset += PtrByteSize; + break; + case MVT::f32: + case MVT::f64: + if (FPR_idx != NumFPRs) { + RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); + + if (isVarArg) { + // A single float or an aggregate containing only a single float + // must be passed right-justified in the stack doubleword, and + // in the GPR, if one is available. + SDValue StoreOff; + if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) { + SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); + StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); + } else + StoreOff = PtrOff; + + SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff, + MachinePointerInfo(), false, false, 0); + MemOpChains.push_back(Store); + + // Float varargs are always shadowed in available integer registers + if (GPR_idx != NumGPRs) { + SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, + MachinePointerInfo(), false, false, + false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + } + } else if (GPR_idx != NumGPRs) + // If we have any FPRs remaining, we may also have GPRs remaining. + ++GPR_idx; + } else { + // Single-precision floating-point values are mapped to the + // second (rightmost) word of the stack doubleword. + if (Arg.getValueType() == MVT::f32) { + SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); + } + + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + true, isTailCall, false, MemOpChains, + TailCallArguments, dl); + } + ArgOffset += 8; + break; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + if (isVarArg) { + // These go aligned on the stack, or in the corresponding R registers + // when within range. The Darwin PPC ABI doc claims they also go in + // V registers; in fact gcc does this only for arguments that are + // prototyped, not for those that match the ... We do it for all + // arguments, seems to work. + while (ArgOffset % 16 !=0) { + ArgOffset += PtrByteSize; + if (GPR_idx != NumGPRs) + GPR_idx++; + } + // We could elide this store in the case where the object fits + // entirely in R registers. Maybe later. + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, + DAG.getConstant(ArgOffset, PtrVT)); + SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), false, false, 0); + MemOpChains.push_back(Store); + if (VR_idx != NumVRs) { + SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, + MachinePointerInfo(), + false, false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); + } + ArgOffset += 16; + for (unsigned i=0; i<16; i+=PtrByteSize) { + if (GPR_idx == NumGPRs) + break; + SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, + DAG.getConstant(i, PtrVT)); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), + false, false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + } + break; + } + + // Non-varargs Altivec params generally go in registers, but have + // stack space allocated at the end. + if (VR_idx != NumVRs) { + // Doesn't have GPR space allocated. + RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); + } else { + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + true, isTailCall, true, MemOpChains, + TailCallArguments, dl); + ArgOffset += 16; + } + break; + } + } + + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Check if this is an indirect call (MTCTR/BCTRL). + // See PrepareCall() for more information about calls through function + // pointers in the 64-bit SVR4 ABI. + if (!isTailCall && + !dyn_cast<GlobalAddressSDNode>(Callee) && + !dyn_cast<ExternalSymbolSDNode>(Callee) && + !isBLACompatibleAddress(Callee, DAG)) { + // Load r2 into a virtual register and store it to the TOC save area. + SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); + // TOC save area offset. + SDValue PtrOff = DAG.getIntPtrConstant(40); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), + false, false, 0); + // R12 must contain the address of an indirect callee. This does not + // mean the MTCTR instruction must use R12; it's easier to model this + // as an extra parameter, so do that. + RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); } // Build a sequence of copy-to-reg nodes chained together with token chain @@ -3134,8 +3944,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, } if (isTailCall) - PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, - false, TailCallArguments); + PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, + FPOp, true, TailCallArguments); return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, @@ -3152,7 +3962,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - unsigned NumOps = Outs.size(); + unsigned NumOps = Outs.size(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; @@ -3259,11 +4069,13 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, } // FIXME memcpy is used way more than necessary. Correctness first. + // Note: "by value" is code for passing a structure by value, not + // basic types. if (Flags.isByVal()) { unsigned Size = Flags.getByValSize(); + // Very small objects are passed right-justified. Everything else is + // passed left-justified. if (Size==1 || Size==2) { - // Very small objects are passed right-justified. - // Everything else is passed left-justified. EVT VT = (Size==1) ? MVT::i8 : MVT::i16; if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, @@ -3274,17 +4086,12 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, ArgOffset += PtrByteSize; } else { - SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType()); + SDValue Const = DAG.getConstant(PtrByteSize - Size, + PtrOff.getValueType()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); - SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr, - CallSeqStart.getNode()->getOperand(0), - Flags, DAG, dl); - // This must go outside the CALLSEQ_START..END. - SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, - CallSeqStart.getNode()->getOperand(1)); - DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), - NewCallSeqStart.getNode()); - Chain = CallSeqStart = NewCallSeqStart; + Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, + CallSeqStart, + Flags, DAG, dl); ArgOffset += PtrByteSize; } continue; @@ -3292,15 +4099,13 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // Copy entire object into memory. There are cases where gcc-generated // code assumes it is there, even if it could be put entirely into // registers. (This is not what the doc says.) - SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, - CallSeqStart.getNode()->getOperand(0), - Flags, DAG, dl); - // This must go outside the CALLSEQ_START..END. - SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, - CallSeqStart.getNode()->getOperand(1)); - DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); - Chain = CallSeqStart = NewCallSeqStart; - // And copy the pieces of it that fit into registers. + Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, + CallSeqStart, + Flags, DAG, dl); + + // For small aggregates (Darwin only) and aggregates >= PtrByteSize, + // copy the pieces of the object that fit into registers from the + // parameter save area. for (unsigned j=0; j<Size; j+=PtrByteSize) { SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); @@ -3369,11 +4174,10 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, !isPPC64) // PPC64 has 64-bit GPR's obviously :) ++GPR_idx; } - } else { + } else LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, isPPC64, isTailCall, false, MemOpChains, TailCallArguments, dl); - } if (isPPC64) ArgOffset += 8; else @@ -3468,22 +4272,6 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Check if this is an indirect call (MTCTR/BCTRL). - // See PrepareCall() for more information about calls through function - // pointers in the 64-bit SVR4 ABI. - if (!isTailCall && isPPC64 && PPCSubTarget.isSVR4ABI() && - !dyn_cast<GlobalAddressSDNode>(Callee) && - !dyn_cast<ExternalSymbolSDNode>(Callee) && - !isBLACompatibleAddress(Callee, DAG)) { - // Load r2 into a virtual register and store it to the TOC save area. - SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); - // TOC save area offset. - SDValue PtrOff = DAG.getIntPtrConstant(40); - SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); - Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), - false, false, 0); - } - // On Darwin, R12 must contain the address of an indirect callee. This does // not mean the MTCTR instruction must use R12; it's easier to model this as // an extra parameter, so do that. @@ -3548,8 +4336,24 @@ PPCTargetLowering::LowerReturn(SDValue Chain, for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - OutVals[i], Flag); + + SDValue Arg = OutVals[i]; + + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); Flag = Chain.getValue(1); } @@ -3781,7 +4585,52 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, return SDValue(); if (Op.getOperand(0).getValueType() == MVT::i64) { - SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0)); + SDValue SINT = Op.getOperand(0); + // When converting to single-precision, we actually need to convert + // to double-precision first and then round to single-precision. + // To avoid double-rounding effects during that operation, we have + // to prepare the input operand. Bits that might be truncated when + // converting to double-precision are replaced by a bit that won't + // be lost at this stage, but is below the single-precision rounding + // position. + // + // However, if -enable-unsafe-fp-math is in effect, accept double + // rounding to avoid the extra overhead. + if (Op.getValueType() == MVT::f32 && + !DAG.getTarget().Options.UnsafeFPMath) { + + // Twiddle input to make sure the low 11 bits are zero. (If this + // is the case, we are guaranteed the value will fit into the 53 bit + // mantissa of an IEEE double-precision value without rounding.) + // If any of those low 11 bits were not zero originally, make sure + // bit 12 (value 2048) is set instead, so that the final rounding + // to single-precision gets the correct result. + SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, + SINT, DAG.getConstant(2047, MVT::i64)); + Round = DAG.getNode(ISD::ADD, dl, MVT::i64, + Round, DAG.getConstant(2047, MVT::i64)); + Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); + Round = DAG.getNode(ISD::AND, dl, MVT::i64, + Round, DAG.getConstant(-2048, MVT::i64)); + + // However, we cannot use that value unconditionally: if the magnitude + // of the input value is small, the bit-twiddling we did above might + // end up visibly changing the output. Fortunately, in that case, we + // don't need to twiddle bits since the original input will convert + // exactly to double-precision floating-point already. Therefore, + // construct a conditional to use the original value if the top 11 + // bits are all sign-bit copies, and use the rounded value computed + // above otherwise. + SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, + SINT, DAG.getConstant(53, MVT::i32)); + Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, + Cond, DAG.getConstant(1, MVT::i64)); + Cond = DAG.getSetCC(dl, MVT::i32, + Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT); + + SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); + } + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits); if (Op.getValueType() == MVT::f32) FP = DAG.getNode(ISD::FP_ROUND, dl, @@ -4126,7 +4975,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, unsigned TypeShiftAmt = i & (SplatBitSize-1); // vsplti + shl self. - if (SextVal == (i << (int)TypeShiftAmt)) { + if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, @@ -4171,17 +5020,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } // t = vsplti c, result = vsldoi t, t, 1 - if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) { + if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 2 - if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) { + if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 3 - if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) { + if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); } @@ -5630,6 +6479,14 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const { case 'v': case 'y': return C_RegisterClass; + case 'Z': + // FIXME: While Z does indicate a memory constraint, it specifically + // indicates an r+r address (used in conjunction with the 'y' modifier + // in the replacement string). Currently, we're forcing the base + // register to be r0 in the asm printer (which is interpreted as zero) + // and forming the complete address in the second register. This is + // suboptimal. + return C_Memory; } } return TargetLowering::getConstraintType(Constraint); @@ -5672,6 +6529,9 @@ PPCTargetLowering::getSingleConstraintMatchWeight( case 'y': weight = CW_Register; break; + case 'Z': + weight = CW_Memory; + break; } return weight; } @@ -5688,9 +6548,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return std::make_pair(0U, &PPC::G8RCRegClass); return std::make_pair(0U, &PPC::GPRCRegClass); case 'f': - if (VT == MVT::f32) + if (VT == MVT::f32 || VT == MVT::i32) return std::make_pair(0U, &PPC::F4RCRegClass); - if (VT == MVT::f64) + if (VT == MVT::f64 || VT == MVT::i64) return std::make_pair(0U, &PPC::F8RCRegClass); break; case 'v': @@ -5870,7 +6730,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) && MFI->getStackSize() && - !MF.getFunction()->hasFnAttr(Attribute::Naked); + !MF.getFunction()->getFnAttributes(). + hasAttribute(Attributes::Naked); unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : (is31 ? PPC::R31 : PPC::R1); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index b0a013b..b3c7f9c 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -174,6 +174,10 @@ namespace llvm { /// operand #3 optional in flag TC_RETURN, + /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls + CR6SET, + CR6UNSET, + /// STD_32 - This is the STD instruction for use with "32-bit" registers. STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, @@ -463,20 +467,41 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG) const; SDValue + extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, + SDValue ArgVal, DebugLoc dl) const; + + void + setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG, + unsigned nAltivecParamsAtEnd, + unsigned MinReservedArea, bool isPPC64) const; + + SDValue LowerFormalArguments_Darwin(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue - LowerFormalArguments_SVR4(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; + LowerFormalArguments_64SVR4(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + SDValue + LowerFormalArguments_32SVR4(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + SDValue + createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, + SDValue CallSeqStart, ISD::ArgFlagsTy Flags, + SelectionDAG &DAG, DebugLoc dl) const; SDValue - LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + LowerCall_Darwin(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, @@ -484,13 +509,22 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue - LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool isTailCall, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; + LowerCall_64SVR4(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, + bool isVarArg, bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + SDValue + LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; }; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 39778a5..9711452 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -29,6 +29,9 @@ def symbolLo64 : Operand<i64> { let PrintMethod = "printSymbolLo"; let EncoderMethod = "getLO16Encoding"; } +def tocentry : Operand<iPTR> { + let MIOperandInfo = (ops i32imm:$imm); +} //===----------------------------------------------------------------------===// // 64-bit transformation functions. @@ -60,7 +63,7 @@ def HI48_64 : SDNodeXForm<imm, [{ // let Defs = [LR8] in - def MovePCtoLR8 : Pseudo<(outs), (ins), "", []>, + def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>, PPC970_Unit_BRU; // Darwin ABI Calls. @@ -138,31 +141,31 @@ def : Pat<(PPCnop), let usesCustomInserter = 1 in { let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64", [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_LOAD_SUB_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64", [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_LOAD_OR_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64", [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_LOAD_XOR_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64", [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_LOAD_AND_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64", [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_LOAD_NAND_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64", [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_CMP_SWAP_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64", [(set G8RC:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>; def ATOMIC_SWAP_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64", [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>; } } @@ -231,10 +234,10 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { let Defs = [CTR8], Uses = [CTR8] in { - def BDZ8 : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), - "bdz $dst", BrB, []>; - def BDNZ8 : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), - "bdnz $dst", BrB, []>; + def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz $dst">; + def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz $dst">; } } @@ -244,7 +247,7 @@ def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS), PPC970_MicroCode, PPC970_Unit_CRU; def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM), - "", SprMFCR>, + "#MFCR8pseud", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins), @@ -275,7 +278,7 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins), // the POWER3. let Defs = [X1], Uses = [X1] in -def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"", +def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8", [(set G8RC:$result, (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>; @@ -296,12 +299,14 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins), let PPC970_Unit = 1 in { // FXU Operations. +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm), "li $rD, $imm", IntSimple, [(set G8RC:$rD, immSExt16:$imm)]>; def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm), "lis $rD, $imm", IntSimple, [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>; +} // Logical ops. def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), @@ -459,7 +464,7 @@ def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS), let Defs = [CARRY] in { def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH), - "sradi $rA, $rS, $SH", IntRotateD, + "sradi $rA, $rS, $SH", IntRotateDI, [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64; } def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS), @@ -482,23 +487,23 @@ def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), let isCommutable = 1 in { def RLDIMI : MDForm_1<30, 3, (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB), - "rldimi $rA, $rS, $SH, $MB", IntRotateD, + "rldimi $rA, $rS, $SH, $MB", IntRotateDI, []>, isPPC64, RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; } // Rotate instructions. def RLDCL : MDForm_1<30, 0, - (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MB), - "rldcl $rA, $rS, $rB, $MB", IntRotateD, + (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MBE), + "rldcl $rA, $rS, $rB, $MBE", IntRotateD, []>, isPPC64; def RLDICL : MDForm_1<30, 0, - (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB), - "rldicl $rA, $rS, $SH, $MB", IntRotateD, + (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE), + "rldicl $rA, $rS, $SH, $MBE", IntRotateDI, []>, isPPC64; def RLDICR : MDForm_1<30, 1, - (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME), - "rldicr $rA, $rS, $SH, $ME", IntRotateD, + (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE), + "rldicr $rA, $rS, $SH, $MBE", IntRotateDI, []>, isPPC64; def RLWINM8 : MForm_2<21, @@ -506,7 +511,7 @@ def RLWINM8 : MForm_2<21, "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, []>; -def ISEL8 : AForm_1<31, 15, +def ISEL8 : AForm_4<31, 15, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; @@ -541,19 +546,19 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), let mayLoad = 1 in def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, ptr_rc:$rA), - "lhau $rD, $disp($rA)", LdStLoad, + "lhau $rD, $disp($rA)", LdStLHAU, []>, RegConstraint<"$rA = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLoad, + "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), +def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lwaux $rD, $addr", LdStLoad, + "lwaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -584,31 +589,31 @@ def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), // Update forms. let mayLoad = 1 in { def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoad, + "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoad, + "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoad, + "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoad, + "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result), +def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoad, + "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoad, + "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; } @@ -621,18 +626,26 @@ def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), "ld $rD, $src", LdStLD, [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64; def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), - "", + "#LDtoc", [(set G8RC:$rD, (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; +def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), + "#LDtocJTI", + [(set G8RC:$rD, + (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64; +def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), + "#LDtocCPT", + [(set G8RC:$rD, + (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64; let hasSideEffects = 1 in { -let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. -def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg), +let RST = 2, DS = 2 in +def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg), "ld 2, 8($reg)", LdStLD, [(PPCload_toc G8RC:$reg)]>, isPPC64; -let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. -def LDtoc_restore : DSForm_1<58, 0, (outs), (ins), +let RST = 2, DS = 10, RA = 1 in +def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), "ld 2, 40(1)", LdStLD, [(PPCtoc_restore)]>, isPPC64; } @@ -642,13 +655,13 @@ def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), let mayLoad = 1 in def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr), - "ldu $rD, $addr", LdStLD, + "ldu $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "ldux $rD, $addr", LdStLoad, + "ldux $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -693,16 +706,16 @@ def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), let PPC970_Unit = 2 in { -def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS, +def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStore, + "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStore, + "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, @@ -710,7 +723,7 @@ def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStore, + "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, @@ -718,7 +731,7 @@ def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, s16immX4:$ptroff, ptr_rc:$ptrreg), - "stdu $rS, $ptroff($ptrreg)", LdStSTD, + "stdu $rS, $ptroff($ptrreg)", LdStSTDU, [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, @@ -727,7 +740,7 @@ def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStore, + "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -736,7 +749,7 @@ def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStore, + "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -745,7 +758,7 @@ def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStore, + "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -754,7 +767,7 @@ def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stdux $rS, $ptroff, $ptrreg", LdStStore, + "stdux $rS, $ptroff, $ptrreg", LdStSTDU, [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index b0b8423..ba58c3e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -340,6 +340,28 @@ def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vctuxs $vD, $vB, $UIMM", VecFP, [(set VRRC:$vD, (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>; + +// Defines with the UIM field set to 0 for floating-point +// to integer (fp_to_sint/fp_to_uint) conversions and integer +// to floating-point (sint_to_fp/uint_to_fp) conversions. +let VA = 0 in { +def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB), + "vcfsx $vD, $vB, 0", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vcfsx VRRC:$vB, 0))]>; +def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB), + "vctuxs $vD, $vB, 0", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vctuxs VRRC:$vB, 0))]>; +def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB), + "vcfux $vD, $vB, 0", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vcfux VRRC:$vB, 0))]>; +def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB), + "vctsxs $vD, $vB, 0", VecFP, + [(set VRRC:$vD, + (int_ppc_altivec_vctsxs VRRC:$vB, 0))]>; +} def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>; def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>; @@ -689,3 +711,13 @@ def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))), (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>; def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))), (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>; + +// Float to integer and integer to float conversions +def : Pat<(v4i32 (fp_to_sint (v4f32 VRRC:$vA))), + (VCTSXS_0 VRRC:$vA)>; +def : Pat<(v4i32 (fp_to_uint (v4f32 VRRC:$vA))), + (VCTUXS_0 VRRC:$vA)>; +def : Pat<(v4f32 (sint_to_fp (v4i32 VRRC:$vA))), + (VCFSX_0 VRRC:$vA)>; +def : Pat<(v4f32 (uint_to_fp (v4i32 VRRC:$vA))), + (VCFUX_0 VRRC:$vA)>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td index a41a027..c3c171c 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -94,12 +94,6 @@ class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, let Inst{31} = lk; } -class IForm_ext<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, list<dag> pattern> - : IForm<opcode, aa, lk, OOL, IOL, asmstr, itin, pattern> { - let LI{0-4} = bo; -} - // 1.7.2 B-Form class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> : I<opcode, OOL, IOL, asmstr, BrB> { @@ -118,6 +112,13 @@ class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> let Inst{31} = lk; } +class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, + string asmstr> + : BForm<opcode, aa, lk, OOL, IOL, asmstr> { + let BIBO{4-0} = bo; + let BIBO{6-5} = 0; + let CR = 0; +} // 1.7.4 D-Form class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr, @@ -625,9 +626,9 @@ class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : I<opcode, OOL, IOL, asmstr, itin> { bits<8> FXM; - bits<5> ST; + bits<5> rS; - let Inst{6-10} = ST; + let Inst{6-10} = rS; let Inst{11} = 0; let Inst{12-19} = FXM; let Inst{20} = 0; @@ -666,7 +667,7 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, string cstr, InstrItinClass itin, list<dag>pattern> : I<opcode, OOL, IOL, asmstr, itin> { bits<8> FM; - bits<5> RT; + bits<5> rT; bit RC = 0; // set by isDOT let Pattern = pattern; @@ -675,7 +676,7 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{6} = 0; let Inst{7-14} = FM; let Inst{15} = 0; - let Inst{16-20} = RT; + let Inst{16-20} = rT; let Inst{21-30} = xo; let Inst{31} = RC; } @@ -758,6 +759,26 @@ class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, let FRB = 0; } +class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RT; + bits<5> RA; + bits<5> RB; + bits<7> BIBO; // 2 bits of BI and 5 bits of BO (must be 12). + bits<3> CR; + + let Pattern = pattern; + + let Inst{6-10} = RT; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-23} = CR; + let Inst{24-25} = BIBO{6-5}; + let Inst{26-30} = xo; + let Inst{31} = 0; +} + // 1.7.13 M-Form class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 47f09dc..d9d6844 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -54,7 +54,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( const TargetMachine *TM, const ScheduleDAG *DAG) const { unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); - if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) { + if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 || + Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) { const InstrItineraryData *II = TM->getInstrItineraryData(); return new PPCScoreboardHazardRecognizer(II, DAG); } @@ -70,7 +71,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); // Most subtargets use a PPC970 recognizer. - if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) { + if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 && + Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) { const TargetInstrInfo *TII = TM.getInstrInfo(); assert(TII && "No InstrInfo?"); @@ -568,12 +570,15 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, // STVX VAL, 0, R0 // // FIXME: We use R0 here, because it isn't available for RA. - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0), + bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); + unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; + unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), FrameIdx, 0, 0)); NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX)) .addReg(SrcReg, getKillRegState(isKill)) - .addReg(PPC::R0) - .addReg(PPC::R0)); + .addReg(GPR0) + .addReg(GPR0)); } else { llvm_unreachable("Unknown regclass!"); } @@ -705,10 +710,13 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, // Dest = LVX 0, R0 // // FIXME: We use R0 here, because it isn't available for RA. - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0), + bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); + unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; + unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0) - .addReg(PPC::R0)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0) + .addReg(GPR0)); } else { llvm_unreachable("Unknown regclass!"); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index f57f0c9..6ee045a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -123,9 +123,11 @@ def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPInGlue, SDNPOutGlue]>; def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPInGlue, SDNPOutGlue]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, @@ -153,6 +155,12 @@ def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, [SDNPHasChain, SDNPMayStore]>; +// Instructions to set/unset CR bit 6 for SVR4 vararg calls +def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + // Instructions to support atomic operations def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, [SDNPHasChain, SDNPMayLoad]>; @@ -330,9 +338,6 @@ def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); let EncoderMethod = "getMemRIXEncoding"; } -def tocentry : Operand<iPTR> { - let MIOperandInfo = (ops i32imm:$imm); -} // PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg // that doesn't matter. @@ -364,9 +369,9 @@ def IsBookE : Predicate<"PPCSubTarget.isBookE()">; let hasCtrlDep = 1 in { let Defs = [R1], Uses = [R1] in { -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "", +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "#ADJCALLSTACKDOWN $amt", [(callseq_start timm:$amt)]>; -def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "", +def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCALLSTACKUP $amt1 $amt2", [(callseq_end timm:$amt1, timm:$amt2)]>; } @@ -375,7 +380,7 @@ def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS), } let Defs = [R1], Uses = [R1] in -def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "", +def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC", [(set GPRC:$result, (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>; @@ -384,19 +389,19 @@ def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "", let usesCustomInserter = 1, // Expanded after instruction selection. PPC970_Single = 1 in { def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F, - i32imm:$BROPC), "", + i32imm:$BROPC), "#SELECT_CC_I4", []>; def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F, - i32imm:$BROPC), "", + i32imm:$BROPC), "#SELECT_CC_I8", []>; def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F, - i32imm:$BROPC), "", + i32imm:$BROPC), "#SELECT_CC_F4", []>; def SELECT_CC_F8 : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F, - i32imm:$BROPC), "", + i32imm:$BROPC), "#SELECT_CC_F8", []>; def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F, - i32imm:$BROPC), "", + i32imm:$BROPC), "#SELECT_CC_VRRC", []>; } @@ -404,16 +409,16 @@ let usesCustomInserter = 1, // Expanded after instruction selection. // scavenge a register for it. let mayStore = 1 in def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F), - "", []>; + "#SPILL_CR", []>; // RESTORE_CR - Indicate that we're restoring the CR register (previously // spilled), so we'll need to scavenge a register for it. let mayLoad = 1 in def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F), - "", []>; + "#RESTORE_CR", []>; let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { - let isReturn = 1, Uses = [LR, RM] in + let isCodeGenOnly = 1, isReturn = 1, Uses = [LR, RM] in def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p), "b${p:cc}lr ${p:reg}", BrB, [(retflag)]>; @@ -422,7 +427,7 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { } let Defs = [LR] in - def MovePCtoLR : Pseudo<(outs), (ins), "", []>, + def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>, PPC970_Unit_BRU; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { @@ -434,16 +439,17 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { // BCC represents an arbitrary conditional branch on a predicate. // FIXME: should be able to write a pattern for PPCcondbranch, but can't use - // a two-value operand where a dag node expects two operands. :( - def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), - "b${cond:cc} ${cond:reg}, $dst" - /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>; + // a two-value operand where a dag node expects two operands. :( + let isCodeGenOnly = 1 in + def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), + "b${cond:cc} ${cond:reg}, $dst" + /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>; let Defs = [CTR], Uses = [CTR] in { - def BDZ : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), - "bdz $dst", BrB, []>; - def BDNZ : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), - "bdnz $dst", BrB, []>; + def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz $dst">; + def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz $dst">; } } @@ -559,81 +565,81 @@ def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)), let usesCustomInserter = 1 in { let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8", [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_SUB_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8", [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_AND_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8", [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_OR_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8", [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_XOR_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8", [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_NAND_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8", [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_ADD_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16", [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_SUB_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16", [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_AND_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16", [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_OR_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16", [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_XOR_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16", [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_NAND_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16", [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_ADD_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32", [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_SUB_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32", [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_AND_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32", [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_OR_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32", [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_XOR_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32", [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_NAND_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32", [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_CMP_SWAP_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8", [(set GPRC:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; def ATOMIC_CMP_SWAP_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new", [(set GPRC:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; def ATOMIC_CMP_SWAP_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new", [(set GPRC:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; def ATOMIC_SWAP_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8", [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>; def ATOMIC_SWAP_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16", [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>; def ATOMIC_SWAP_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32", [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>; } } @@ -673,7 +679,7 @@ def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src), [(set GPRC:$rD, (load iaddr:$src))]>; def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src), - "lfs $rD, $src", LdStLFDU, + "lfs $rD, $src", LdStLFD, [(set F4RC:$rD, (load iaddr:$src))]>; def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), "lfd $rD, $src", LdStLFD, @@ -683,32 +689,32 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1 in { def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoad, + "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhau $rD, $addr", LdStLoad, + "lhau $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoad, + "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoad, + "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lfs $rD, $addr", LdStLFDU, + "lfsu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lfd $rD, $addr", LdStLFD, + "lfdu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; @@ -716,37 +722,37 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), // Indexed (r+r) Loads with Update (preinc). def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoad, + "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLoad, + "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result), +def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoad, + "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoad, + "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lfsux $rD, $addr", LdStLoad, + "lfsux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lfdux $rD, $addr", LdStLoad, + "lfdux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; } @@ -778,10 +784,10 @@ def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), - "lfsx $frD, $src", LdStLFDU, + "lfsx $frD, $src", LdStLFD, [(set F4RC:$frD, (load xaddr:$src))]>; def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), - "lfdx $frD, $src", LdStLFDU, + "lfdx $frD, $src", LdStLFD, [(set F8RC:$frD, (load xaddr:$src))]>; } @@ -801,10 +807,10 @@ def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src), "stw $rS, $src", LdStStore, [(store GPRC:$rS, iaddr:$src)]>; def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst), - "stfs $rS, $dst", LdStUX, + "stfs $rS, $dst", LdStSTFD, [(store F4RC:$rS, iaddr:$dst)]>; def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), - "stfd $rS, $dst", LdStUX, + "stfd $rS, $dst", LdStSTFD, [(store F8RC:$rS, iaddr:$dst)]>; } @@ -812,33 +818,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), let PPC970_Unit = 2 in { def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStore, + "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStore, + "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStore, + "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfsu $rS, $ptroff($ptrreg)", LdStStore, + "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfdu $rS, $ptroff($ptrreg)", LdStStore, + "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; @@ -863,7 +869,7 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStore, + "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -872,7 +878,7 @@ def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStore, + "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -881,7 +887,7 @@ def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStore, + "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, @@ -889,7 +895,7 @@ def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfsux $rS, $ptroff, $ptrreg", LdStStore, + "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, @@ -897,7 +903,7 @@ def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res), (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfdux $rS, $ptroff, $ptrreg", LdStStore, + "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, @@ -913,14 +919,14 @@ def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), PPC970_DGroup_Cracked; def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst), - "stfiwx $frS, $dst", LdStUX, + "stfiwx $frS, $dst", LdStSTFD, [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>; def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst), - "stfsx $frS, $dst", LdStUX, + "stfsx $frS, $dst", LdStSTFD, [(store F4RC:$frS, xaddr:$dst)]>; def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), - "stfdx $frS, $dst", LdStUX, + "stfdx $frS, $dst", LdStSTFD, [(store F8RC:$frS, xaddr:$dst)]>; } @@ -964,7 +970,7 @@ def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>; } -let isReMaterializable = 1 in { +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm), "li $rD, $imm", IntSimple, [(set GPRC:$rD, immSExt16:$imm)]>; @@ -1143,6 +1149,16 @@ def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins), "crxor $dst, $dst, $dst", BrCR, []>; +let Defs = [CR1EQ], CRD = 6 in { +def CR6SET : XLForm_1_ext<19, 289, (outs), (ins), + "creqv 6, 6, 6", BrCR, + [(PPCcr6set)]>; + +def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), + "crxor 6, 6, 6", BrCR, + [(PPCcr6unset)]>; +} + // XFX-Form instructions. Instructions that deal with SPRs. // let Uses = [CTR] in { @@ -1192,7 +1208,7 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), // // FIXME: Make this a real Pseudo instruction when the JIT switches to MC. def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), - "", SprMFCR>, + "#MFCRpseud", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins), @@ -1233,7 +1249,7 @@ let Uses = [RM] in { PPC970_DGroup_Single, PPC970_Unit_FPU; def FADDrtz: AForm_2<63, 21, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fadd $FRT, $FRA, $FRB", FPGeneral, + "fadd $FRT, $FRA, $FRB", FPAddSub, [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>, PPC970_DGroup_Single, PPC970_Unit_FPU; } @@ -1364,7 +1380,7 @@ def FSELS : AForm_1<63, 23, let Uses = [RM] in { def FADD : AForm_2<63, 21, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fadd $FRT, $FRA, $FRB", FPGeneral, + "fadd $FRT, $FRA, $FRB", FPAddSub, [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>; def FADDS : AForm_2<59, 21, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), @@ -1379,16 +1395,16 @@ let Uses = [RM] in { "fdivs $FRT, $FRA, $FRB", FPDivS, [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>; def FMUL : AForm_3<63, 25, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fmul $FRT, $FRA, $FRB", FPFused, - [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRB))]>; + (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC), + "fmul $FRT, $FRA, $FRC", FPFused, + [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRC))]>; def FMULS : AForm_3<59, 25, - (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), - "fmuls $FRT, $FRA, $FRB", FPGeneral, - [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>; + (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC), + "fmuls $FRT, $FRA, $FRC", FPGeneral, + [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRC))]>; def FSUB : AForm_2<63, 20, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fsub $FRT, $FRA, $FRB", FPGeneral, + "fsub $FRT, $FRA, $FRB", FPAddSub, [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>; def FSUBS : AForm_2<59, 20, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), @@ -1398,7 +1414,7 @@ let Uses = [RM] in { } let PPC970_Unit = 1 in { // FXU Operations. - def ISEL : AForm_1<31, 15, + def ISEL : AForm_4<31, 15, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index ab8bf1f..459c358 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -71,7 +71,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, ST.isPPC64() ? 0 : 1, ST.isPPC64() ? 0 : 1), - Subtarget(ST), TII(tii) { + Subtarget(ST), TII(tii), CRSpillFrameIdx(0) { ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; @@ -111,10 +111,15 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return Subtarget.isPPC64() ? CSR_Darwin64_SaveList : CSR_Darwin32_SaveList; + // For 32-bit SVR4, also initialize the frame index associated with + // the CR spill slot. + if (!Subtarget.isPPC64()) + CRSpillFrameIdx = 0; + return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList; } -const unsigned* +const uint32_t* PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { if (Subtarget.isDarwinABI()) return Subtarget.isPPC64() ? CSR_Darwin64_RegMask : @@ -477,6 +482,31 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, MBB.erase(II); } +bool +PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, + unsigned Reg, int &FrameIdx) const { + + // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4 + // ABI, return true to prevent allocating an additional frame slot. + // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0 + // is arbitrary and will be subsequently ignored. For 32-bit, we must + // create exactly one stack slot and return its FrameIdx for all + // nonvolatiles. + if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) { + if (Subtarget.isPPC64()) { + FrameIdx = 0; + } else if (CRSpillFrameIdx) { + FrameIdx = CRSpillFrameIdx; + } else { + MachineFrameInfo *MFI = ((MachineFunction &)MF).getFrameInfo(); + FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); + CRSpillFrameIdx = FrameIdx; + } + return true; + } + return false; +} + void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const { @@ -566,7 +596,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // to Offset to get the correct offset. // Naked functions have stack size 0, although getStackSize may not reflect that // because we didn't call all the pieces that compute it for naked functions. - if (!MF.getFunction()->hasFnAttr(Attribute::Naked)) + if (!MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked)) Offset += MFI->getStackSize(); // If we can, encode the offset directly into the instruction. If this is a diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index 152c36d..a8fd796 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -30,6 +30,7 @@ class PPCRegisterInfo : public PPCGenRegisterInfo { std::map<unsigned, unsigned> ImmToIdxMap; const PPCSubtarget &Subtarget; const TargetInstrInfo &TII; + mutable int CRSpillFrameIdx; public: PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii); @@ -43,7 +44,7 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - const unsigned *getCallPreservedMask(CallingConv::ID CC) const; + const uint32_t *getCallPreservedMask(CallingConv::ID CC) const; BitVector getReservedRegs(const MachineFunction &MF) const; @@ -65,6 +66,8 @@ public: int SPAdj, RegScavenger *RS) const; void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const; + bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, + int &FrameIdx) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td index 6a6ccb9..660c0c3 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td @@ -40,6 +40,7 @@ def IntMulHWU : InstrItinClass; def IntMulLI : InstrItinClass; def IntRFID : InstrItinClass; def IntRotateD : InstrItinClass; +def IntRotateDI : InstrItinClass; def IntRotate : InstrItinClass; def IntShift : InstrItinClass; def IntTrapD : InstrItinClass; @@ -52,15 +53,18 @@ def LdStDCBA : InstrItinClass; def LdStDCBF : InstrItinClass; def LdStDCBI : InstrItinClass; def LdStLoad : InstrItinClass; +def LdStLoadUpd : InstrItinClass; def LdStStore : InstrItinClass; +def LdStStoreUpd : InstrItinClass; def LdStDSS : InstrItinClass; def LdStICBI : InstrItinClass; -def LdStUX : InstrItinClass; def LdStLD : InstrItinClass; +def LdStLDU : InstrItinClass; def LdStLDARX : InstrItinClass; def LdStLFD : InstrItinClass; def LdStLFDU : InstrItinClass; def LdStLHA : InstrItinClass; +def LdStLHAU : InstrItinClass; def LdStLMW : InstrItinClass; def LdStLVecX : InstrItinClass; def LdStLWA : InstrItinClass; @@ -69,6 +73,9 @@ def LdStSLBIA : InstrItinClass; def LdStSLBIE : InstrItinClass; def LdStSTD : InstrItinClass; def LdStSTDCX : InstrItinClass; +def LdStSTDU : InstrItinClass; +def LdStSTFD : InstrItinClass; +def LdStSTFDU : InstrItinClass; def LdStSTVEBX : InstrItinClass; def LdStSTWCX : InstrItinClass; def LdStSync : InstrItinClass; @@ -86,6 +93,7 @@ def SprMTSRIN : InstrItinClass; def SprRFI : InstrItinClass; def SprSC : InstrItinClass; def FPGeneral : InstrItinClass; +def FPAddSub : InstrItinClass; def FPCompare : InstrItinClass; def FPDivD : InstrItinClass; def FPDivS : InstrItinClass; @@ -110,6 +118,8 @@ include "PPCScheduleG4.td" include "PPCScheduleG4Plus.td" include "PPCScheduleG5.td" include "PPCScheduleA2.td" +include "PPCScheduleE500mc.td" +include "PPCScheduleE5500.td" //===----------------------------------------------------------------------===// // Instruction to itinerary class map - When add new opcodes to the supported @@ -171,7 +181,7 @@ include "PPCScheduleA2.td" // extsh IntSimple // extsw IntSimple // fabs FPGeneral -// fadd FPGeneral +// fadd FPAddSub // fadds FPGeneral // fcfid FPGeneral // fcmpo FPCompare @@ -201,35 +211,35 @@ include "PPCScheduleA2.td" // fsel FPGeneral // fsqrt FPSqrt // fsqrts FPSqrt -// fsub FPGeneral +// fsub FPAddSub // fsubs FPGeneral // icbi LdStICBI // isync SprISYNC // lbz LdStLoad -// lbzu LdStLoad -// lbzux LdStUX +// lbzu LdStLoadUpd +// lbzux LdStLoadUpd // lbzx LdStLoad // ld LdStLD // ldarx LdStLDARX -// ldu LdStLD -// ldux LdStLD +// ldu LdStLDU +// ldux LdStLDU // ldx LdStLD // lfd LdStLFD // lfdu LdStLFDU // lfdux LdStLFDU -// lfdx LdStLFDU -// lfs LdStLFDU +// lfdx LdStLFD +// lfs LdStLFD // lfsu LdStLFDU // lfsux LdStLFDU -// lfsx LdStLFDU +// lfsx LdStLFD // lha LdStLHA -// lhau LdStLHA -// lhaux LdStLHA +// lhau LdStLHAU +// lhaux LdStLHAU // lhax LdStLHA // lhbrx LdStLoad // lhz LdStLoad -// lhzu LdStLoad -// lhzux LdStUX +// lhzu LdStLoadUpd +// lhzux LdStLoadUpd // lhzx LdStLoad // lmw LdStLMW // lswi LdStLMW @@ -243,12 +253,12 @@ include "PPCScheduleA2.td" // lvxl LdStLVecX // lwa LdStLWA // lwarx LdStLWARX -// lwaux LdStLHA +// lwaux LdStLHAU // lwax LdStLHA // lwbrx LdStLoad // lwz LdStLoad -// lwzu LdStLoad -// lwzux LdStUX +// lwzu LdStLoadUpd +// lwzux LdStLoadUpd // lwzx LdStLoad // mcrf BrMCR // mcrfs FPGeneral @@ -292,10 +302,10 @@ include "PPCScheduleA2.td" // rfid IntRFID // rldcl IntRotateD // rldcr IntRotateD -// rldic IntRotateD -// rldicl IntRotateD -// rldicr IntRotateD -// rldimi IntRotateD +// rldic IntRotateDI +// rldicl IntRotateDI +// rldicr IntRotateDI +// rldimi IntRotateDI // rlwimi IntRotate // rlwinm IntGeneral // rlwnm IntGeneral @@ -305,33 +315,33 @@ include "PPCScheduleA2.td" // sld IntRotateD // slw IntGeneral // srad IntRotateD -// sradi IntRotateD +// sradi IntRotateDI // sraw IntShift // srawi IntShift // srd IntRotateD // srw IntGeneral // stb LdStStore -// stbu LdStStore -// stbux LdStStore +// stbu LdStStoreUpd +// stbux LdStStoreUpd // stbx LdStStore // std LdStSTD // stdcx. LdStSTDCX -// stdu LdStSTD -// stdux LdStSTD +// stdu LdStSTDU +// stdux LdStSTDU // stdx LdStSTD -// stfd LdStUX -// stfdu LdStUX -// stfdux LdStUX -// stfdx LdStUX -// stfiwx LdStUX -// stfs LdStUX -// stfsu LdStUX -// stfsux LdStUX -// stfsx LdStUX +// stfd LdStSTFD +// stfdu LdStSTFDU +// stfdux LdStSTFDU +// stfdx LdStSTFD +// stfiwx LdStSTFD +// stfs LdStSTFD +// stfsu LdStSTFDU +// stfsux LdStSTFDU +// stfsx LdStSTFD // sth LdStStore // sthbrx LdStStore -// sthu LdStStore -// sthux LdStStore +// sthu LdStStoreUpd +// sthux LdStStoreUpd // sthx LdStStore // stmw LdStLMW // stswi LdStLMW @@ -344,8 +354,8 @@ include "PPCScheduleA2.td" // stw LdStStore // stwbrx LdStStore // stwcx. LdStSTWCX -// stwu LdStStore -// stwux LdStStore +// stwu LdStStoreUpd +// stwux LdStStoreUpd // stwx LdStStore // subf IntGeneral // subfc IntGeneral diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td index cd0fb70..37b6eac 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td @@ -288,6 +288,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [LWB]>], [9, 5], [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [9, 5], + [GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -297,6 +306,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -306,7 +324,7 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, - InstrItinData<LdStUX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrItinData<LdStSTFD , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, InstrStage<1, [LRACC]>, @@ -315,6 +333,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5, 5], [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5, 5], + [NoBypass, GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -342,6 +369,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -371,6 +407,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1]>, @@ -537,6 +582,19 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [FWB]>], [10, 4, 4], [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<1, [FWB]>], + [10, 4, 4], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td index 4d4a5d0..ba63b5c 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td @@ -181,6 +181,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [10, 7, 7], [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotateDI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, InstrItinData<IntShift , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -302,7 +313,18 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLD , [InstrStage<4, + InstrItinData<LdStLoadUpd , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLDU , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7]>, @@ -324,6 +346,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [13, 7], [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStICBI , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -335,7 +368,7 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [NoBypass, GPR_Bypass]>, - InstrItinData<LdStUX , [InstrStage<4, + InstrItinData<LdStSTFD , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7]>, @@ -346,6 +379,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7, 7], [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, InstrItinData<LdStLFD , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -379,6 +423,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStLMW , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -412,6 +467,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [13, 7], [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTDU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStSTDCX , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -593,6 +659,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], [15, 7, 7], [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7, 7], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, InstrItinData<FPCompare , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td new file mode 100644 index 0000000..9bb779a --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td @@ -0,0 +1,265 @@ +//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Freescale e500mc 32-bit +// Power processor. +// +// All information is derived from the "e500mc Core Reference Manual", +// Freescale Document Number E500MCRM, Rev. 1, 03/2012. +// +//===----------------------------------------------------------------------===// +// Relevant functional units in the Freescale e500mc core: +// +// * Decode & Dispatch +// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue +// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). +def DIS0 : FuncUnit; // Dispatch stage - insn 1 +def DIS1 : FuncUnit; // Dispatch stage - insn 2 + +// * Execute +// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. +// Some instructions can only execute in SFX0 but not SFX1. +// The CFX has a bypass path, allowing non-divide instructions to execute +// while a divide instruction is executed. +def SFX0 : FuncUnit; // Simple unit 0 +def SFX1 : FuncUnit; // Simple unit 1 +def BU : FuncUnit; // Branch unit +def CFX_DivBypass + : FuncUnit; // CFX divide bypass path +def CFX_0 : FuncUnit; // CFX pipeline +def LSU_0 : FuncUnit; // LSU pipeline +def FPU_0 : FuncUnit; // FPU pipeline + +def PPCE500mcItineraries : ProcessorItineraries< + [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0], + [CR_Bypass, GPR_Bypass, FPR_Bypass], [ + InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 1, 1], // Latency = 1 or 2 + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<14, [CFX_DivBypass]>], + [17, 1, 1], // Latency=4..35, Repeat= 4..35 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<8, [FPU_0]>], + [11], // Latency = 8 + [FPR_Bypass]>, + InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<8, [FPU_0]>], + [11, 1, 1], // Latency = 8 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0]>], + [5, 1], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [4, 1], // Latency = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [4, 1, 1], // Latency = 1 + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [4, 1], // Latency = 1 + [CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [CR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 1, 1], // Latency = 4 + [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 1, 1], // Latency = 4 + [FPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 1], // Latency = r+3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<3, [LSU_0]>], + [6, 1, 1], // Latency = 3, Repeat rate = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>]>, + InstrItinData<SprMFSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [7, 1], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0, SFX1]>], + [5, 1], // Latency = 2, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0]>], + [5, 1], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0], 0>]>, + InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<5, [SFX0]>], + [8, 1], + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [CR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTSRIN , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0]>], + [4, 1], + [NoBypass, GPR_Bypass]>, + InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [FPU_0]>], + [13, 1, 1], // Latency = 10, Repeat rate = 4 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [CR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<68, [FPU_0]>], + [71, 1, 1], // Latency = 68, Repeat rate = 68 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<38, [FPU_0]>], + [41, 1, 1], // Latency = 38, Repeat rate = 38 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [FPU_0]>], + [13, 1, 1, 1], // Latency = 10, Repeat rate = 4 + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<38, [FPU_0]>], + [41, 1], // Latency = 38, Repeat rate = 38 + [FPR_Bypass, FPR_Bypass]> +]>; + +// ===---------------------------------------------------------------------===// +// e500mc machine model for scheduling and other instruction cost heuristics. + +def PPCE500mcModel : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let MinLatency = -1; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 5; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let Itineraries = PPCE500mcItineraries; +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td new file mode 100644 index 0000000..d7e11ac --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td @@ -0,0 +1,309 @@ +//===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Freescale e5500 64-bit +// Power processor. +// +// All information is derived from the "e5500 Core Reference Manual", +// Freescale Document Number e5500RM, Rev. 1, 03/2012. +// +//===----------------------------------------------------------------------===// +// Relevant functional units in the Freescale e5500 core +// (These are the same as for the e500mc) +// +// * Decode & Dispatch +// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue +// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). +// def DIS0 : FuncUnit; +// def DIS1 : FuncUnit; + +// * Execute +// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. +// The CFX has a bypass path, allowing non-divide instructions to execute +// while a divide instruction is being executed. +// def SFX0 : FuncUnit; // Simple unit 0 +// def SFX1 : FuncUnit; // Simple unit 1 +// def BU : FuncUnit; // Branch unit +// def CFX_DivBypass +// : FuncUnit; // CFX divide bypass path +// def CFX_0 : FuncUnit; // CFX pipeline stage 0 + +def CFX_1 : FuncUnit; // CFX pipeline stage 1 + +// def LSU_0 : FuncUnit; // LSU pipeline +// def FPU_0 : FuncUnit; // FPU pipeline + + +def PPCE5500Itineraries : ProcessorItineraries< + [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1, + LSU_0, FPU_0], + [CR_Bypass, GPR_Bypass, FPR_Bypass], [ + InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [6, 2, 2], // Latency = 1 or 2 + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<26, [CFX_DivBypass]>], + [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<16, [CFX_DivBypass]>], + [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11], // Latency = 7, Repeat rate = 1 + [FPR_Bypass]>, + InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<7, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 7 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData<IntMulHD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<2, [CFX_1]>], + [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<1, [CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<1, [CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<2, [CFX_1]>], + [8, 2, 2], // Latency = 4 or 5, Repeat = 2 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotateD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0, SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotateDI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0, SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0]>], + [6, 2], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [5, 2], // Latency = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [5, 2, 2], // Latency = 1 + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [5, 2], // Latency = 1 + [CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [5, 2, 2], // Latency = 1 + [CR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLDARX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<3, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [FPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [FPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [LSU_0]>], + [8, 2], // Latency = r+3, Repeat rate = r+3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<3, [LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>]>, + InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [CFX_0]>], + [6, 2], // Latency = 2, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0], 0>]>, + InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<5, [CFX_0]>], + [9, 2], // Latency = 5, Repeat rate = 5 + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [5], // Latency = 1, Repeat rate = 1 + [GPR_Bypass]>, + InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [CFX_0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5], // Latency = 1, Repeat rate = 1 + [GPR_Bypass]>, + InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [CR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<31, [FPU_0]>], + [39, 2, 2], // Latency = 35, Repeat rate = 31 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<16, [FPU_0]>], + [24, 2, 2], // Latency = 20, Repeat rate = 16 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2, 2], // Latency = 7, Repeat rate = 1 + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [FPU_0]>], + [12, 2], // Latency = 8, Repeat rate = 2 + [FPR_Bypass, FPR_Bypass]> +]>; + +// ===---------------------------------------------------------------------===// +// e5500 machine model for scheduling and other instruction cost heuristics. + +def PPCE5500Model : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let MinLatency = -1; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 6; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let Itineraries = PPCE5500Itineraries; +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td index 61e89ed..72a0a39 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td @@ -34,12 +34,16 @@ def G3Itineraries : ProcessorItineraries< InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>, InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>, @@ -58,6 +62,7 @@ def G3Itineraries : ProcessorItineraries< InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>, InstrItinData<SprSC , [InstrStage<2, [SRU]>]>, InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>, + InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>, InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>, InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>, InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td index e19ddfa..fc9120d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td @@ -33,13 +33,17 @@ def G4Itineraries : ProcessorItineraries< InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>, InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, @@ -60,6 +64,7 @@ def G4Itineraries : ProcessorItineraries< InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>, InstrItinData<SprSC , [InstrStage<2, [SRU]>]>, InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>, + InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>, InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>, InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>, InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td index e7446cb..a4e82ce 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -36,19 +36,24 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>, - InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>, InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>, @@ -66,6 +71,7 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>, InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>, + InstrItinData<FPAddSub , [InstrStage<5, [FPU1]>]>, InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>, InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>, InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td index 1371499..7c02ea0 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td @@ -27,6 +27,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>, InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>, InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>, + InstrItinData<IntRotateDI , [InstrStage<2, [IU1, IU2]>]>, InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>, InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>, InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>, @@ -37,15 +38,20 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>, - InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<4, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<4, [SLU]>]>, InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<5, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>, InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>, @@ -53,6 +59,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>, InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>, InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>, InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>, @@ -69,6 +76,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>, InstrItinData<SprSC , [InstrStage<1, [IU2]>]>, InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>, + InstrItinData<FPAddSub , [InstrStage<6, [FPU1, FPU2]>]>, InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>, InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>, InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index bb193ac..9c8cb92 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -54,19 +54,26 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, CPUName = sys::getHostCPUName(); #endif - // Parse features string. - ParseSubtargetFeatures(CPUName, FS); - // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUName); + // Make sure 64-bit features are available when CPUname is generic + std::string FullFS = FS; + // If we are generating code for ppc64, verify that options make sense. if (is64Bit) { Has64BitSupport = true; // Silently force 64-bit register use on ppc64. Use64BitRegs = true; + if (!FullFS.empty()) + FullFS = "+64bit," + FullFS; + else + FullFS = "+64bit"; } - + + // Parse features string. + ParseSubtargetFeatures(CPUName, FullFS); + // If the user requested use of 64-bit regs, but the cpu selected doesn't // support it, ignore. if (use64BitRegs() && !has64BitSupport()) diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h index 0207c83..b9e22f4 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -33,32 +33,34 @@ namespace PPC { enum { DIR_NONE, DIR_32, - DIR_440, - DIR_601, - DIR_602, - DIR_603, + DIR_440, + DIR_601, + DIR_602, + DIR_603, DIR_7400, - DIR_750, - DIR_970, + DIR_750, + DIR_970, DIR_A2, + DIR_E500mc, + DIR_E5500, DIR_PWR6, DIR_PWR7, - DIR_64 + DIR_64 }; } class GlobalValue; class TargetMachine; - + class PPCSubtarget : public PPCGenSubtargetInfo { protected: /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned StackAlignment; - + /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; - + /// Which cpu directive was used. unsigned DarwinDirective; @@ -74,7 +76,7 @@ protected: bool IsBookE; bool HasLazyResolverStubs; bool IsJITCodeModel; - + /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; @@ -84,11 +86,11 @@ public: /// PPCSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit); - - /// ParseSubtargetFeatures - Parses features string setting specified + + /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - + /// SetJITMode - This is called to inform the subtarget info that we are /// producing code for the JIT. void SetJITMode(); @@ -97,20 +99,27 @@ public: /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. unsigned getStackAlignment() const { return StackAlignment; } - + /// getDarwinDirective - Returns the -m directive specified for the cpu. /// unsigned getDarwinDirective() const { return DarwinDirective; } - - /// getInstrItins - Return the instruction itineraies based on subtarget + + /// getInstrItins - Return the instruction itineraies based on subtarget /// selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } - /// getTargetDataString - Return the pointer size and type alignment + /// getDataLayoutString - Return the pointer size and type alignment /// properties of this subtarget. - const char *getTargetDataString() const { + const char *getDataLayoutString() const { // Note, the alignment values for f64 and i64 on ppc64 in Darwin // documentation are wrong; these are correct (i.e. "what gcc does"). + if (isPPC64() && isSVR4ABI()) { + if (TargetTriple.getOS() == llvm::Triple::FreeBSD) + return "E-p:64:64-f64:64:64-i64:64:64-f128:64:64-v128:128:128-n32:64"; + else + return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64"; + } + return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64" : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32"; } @@ -118,22 +127,22 @@ public: /// isPPC64 - Return true if we are generating code for 64-bit pointer mode. /// bool isPPC64() const { return IsPPC64; } - + /// has64BitSupport - Return true if the selected CPU supports 64-bit /// instructions, regardless of whether we are in 32-bit or 64-bit mode. bool has64BitSupport() const { return Has64BitSupport; } - + /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit /// registers in 32-bit mode when possible. This can only true if /// has64BitSupport() returns true. bool use64BitRegs() const { return Use64BitRegs; } - + /// hasLazyResolverStub - Return true if accesses to the specified global have /// to go through a dyld lazy resolution stub. This means that an extra load /// is required to get the address of the global. - bool hasLazyResolverStub(const GlobalValue *GV, + bool hasLazyResolverStub(const GlobalValue *GV, const TargetMachine &TM) const; - + // isJITCodeModel - True if we're generating code for the JIT bool isJITCodeModel() const { return IsJITCodeModel; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 9805112..3fc977e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -40,10 +40,11 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, bool is64Bit) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64Bit), - DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), + DL(Subtarget.getDataLayoutString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()) { + InstrItins(Subtarget.getInstrItineraryData()), + STTI(&TLInfo), VTTI(&TLInfo) { // The binutils for the BG/P are too old for CFI. if (Subtarget.isBGP()) diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h index 7da2b0c..c168433 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -21,7 +21,8 @@ #include "PPCISelLowering.h" #include "PPCSelectionDAGInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetTransformImpl.h" +#include "llvm/DataLayout.h" namespace llvm { @@ -29,13 +30,15 @@ namespace llvm { /// class PPCTargetMachine : public LLVMTargetMachine { PPCSubtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment + const DataLayout DL; // Calculates type size & alignment PPCInstrInfo InstrInfo; PPCFrameLowering FrameLowering; PPCJITInfo JITInfo; PPCTargetLowering TLInfo; PPCSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: PPCTargetMachine(const Target &T, StringRef TT, @@ -58,11 +61,17 @@ public: return &InstrInfo.getRegisterInfo(); } - virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const DataLayout *getDataLayout() const { return &DL; } virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; } virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp index 1c5c89e..716c79f 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp @@ -20,7 +20,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 79f7ebd..8e5619e 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -637,7 +637,7 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType()); Type *ElementTy = Ty->getElementType(); - return getTargetData()->getTypeAllocSize(ElementTy); + return getDataLayout()->getTypeAllocSize(ElementTy); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td index 15541ef..e64c140 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td +++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td @@ -129,7 +129,7 @@ def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet, [SDNPHasChain, SDNPOptInGlue]>; def flushw : SDNode<"SPISD::FLUSHW", SDTNone, - [SDNPHasChain]>; + [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>; def getPCX : Operand<i32> { let PrintMethod = "printGetPCX"; diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp index 9ee12ed..45c9624 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp @@ -33,10 +33,10 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, bool is64bit) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64bit), - DataLayout(Subtarget.getDataLayout()), + DL(Subtarget.getDataLayout()), InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget) { + FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) { } namespace { diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h index b2cc624..0fbe2d7 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h +++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h @@ -20,18 +20,21 @@ #include "SparcSelectionDAGInfo.h" #include "SparcSubtarget.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetTransformImpl.h" namespace llvm { class SparcTargetMachine : public LLVMTargetMachine { SparcSubtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment + const DataLayout DL; // Calculates type size & alignment SparcInstrInfo InstrInfo; SparcTargetLowering TLInfo; SparcSelectionDAGInfo TSInfo; SparcFrameLowering FrameLowering; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: SparcTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -52,7 +55,13 @@ public: virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const { return &TSInfo; } - virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } + virtual const DataLayout *getDataLayout() const { return &DL; } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); diff --git a/contrib/llvm/lib/Target/Target.cpp b/contrib/llvm/lib/Target/Target.cpp index a2b83bc..393178a 100644 --- a/contrib/llvm/lib/Target/Target.cpp +++ b/contrib/llvm/lib/Target/Target.cpp @@ -16,7 +16,7 @@ #include "llvm-c/Initialization.h" #include "llvm/InitializePasses.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/LLVMContext.h" #include <cstring> @@ -24,8 +24,9 @@ using namespace llvm; void llvm::initializeTarget(PassRegistry &Registry) { - initializeTargetDataPass(Registry); + initializeDataLayoutPass(Registry); initializeTargetLibraryInfoPass(Registry); + initializeTargetTransformInfoPass(Registry); } void LLVMInitializeTarget(LLVMPassRegistryRef R) { @@ -33,11 +34,11 @@ void LLVMInitializeTarget(LLVMPassRegistryRef R) { } LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) { - return wrap(new TargetData(StringRep)); + return wrap(new DataLayout(StringRep)); } void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) { - unwrap(PM)->add(new TargetData(*unwrap(TD))); + unwrap(PM)->add(new DataLayout(*unwrap(TD))); } void LLVMAddTargetLibraryInfo(LLVMTargetLibraryInfoRef TLI, @@ -55,13 +56,21 @@ LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) { } unsigned LLVMPointerSize(LLVMTargetDataRef TD) { - return unwrap(TD)->getPointerSize(); + return unwrap(TD)->getPointerSize(0); +} + +unsigned LLVMPointerSizeForAS(LLVMTargetDataRef TD, unsigned AS) { + return unwrap(TD)->getPointerSize(AS); } LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) { return wrap(unwrap(TD)->getIntPtrType(getGlobalContext())); } +LLVMTypeRef LLVMIntPtrTypeForAS(LLVMTargetDataRef TD, unsigned AS) { + return wrap(unwrap(TD)->getIntPtrType(getGlobalContext(), AS)); +} + unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) { return unwrap(TD)->getTypeSizeInBits(unwrap(Ty)); } diff --git a/contrib/llvm/lib/Target/TargetData.cpp b/contrib/llvm/lib/Target/TargetData.cpp deleted file mode 100644 index cc6dc1e..0000000 --- a/contrib/llvm/lib/Target/TargetData.cpp +++ /dev/null @@ -1,663 +0,0 @@ -//===-- TargetData.cpp - Data size & alignment routines --------------------==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines target properties related to datatype size/offset/alignment -// information. -// -// This structure should be created once, filled in if the defaults are not -// correct and then passed around by const&. None of the members functions -// require modification to the object. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Target/TargetData.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Mutex.h" -#include "llvm/ADT/DenseMap.h" -#include <algorithm> -#include <cstdlib> -using namespace llvm; - -// Handle the Pass registration stuff necessary to use TargetData's. - -// Register the default SparcV9 implementation... -INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true) -char TargetData::ID = 0; - -//===----------------------------------------------------------------------===// -// Support for StructLayout -//===----------------------------------------------------------------------===// - -StructLayout::StructLayout(StructType *ST, const TargetData &TD) { - assert(!ST->isOpaque() && "Cannot get layout of opaque structs"); - StructAlignment = 0; - StructSize = 0; - NumElements = ST->getNumElements(); - - // Loop over each of the elements, placing them in memory. - for (unsigned i = 0, e = NumElements; i != e; ++i) { - Type *Ty = ST->getElementType(i); - unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty); - - // Add padding if necessary to align the data element properly. - if ((StructSize & (TyAlign-1)) != 0) - StructSize = TargetData::RoundUpAlignment(StructSize, TyAlign); - - // Keep track of maximum alignment constraint. - StructAlignment = std::max(TyAlign, StructAlignment); - - MemberOffsets[i] = StructSize; - StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item - } - - // Empty structures have alignment of 1 byte. - if (StructAlignment == 0) StructAlignment = 1; - - // Add padding to the end of the struct so that it could be put in an array - // and all array elements would be aligned correctly. - if ((StructSize & (StructAlignment-1)) != 0) - StructSize = TargetData::RoundUpAlignment(StructSize, StructAlignment); -} - - -/// getElementContainingOffset - Given a valid offset into the structure, -/// return the structure index that contains it. -unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const { - const uint64_t *SI = - std::upper_bound(&MemberOffsets[0], &MemberOffsets[NumElements], Offset); - assert(SI != &MemberOffsets[0] && "Offset not in structure type!"); - --SI; - assert(*SI <= Offset && "upper_bound didn't work"); - assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) && - (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) && - "Upper bound didn't work!"); - - // Multiple fields can have the same offset if any of them are zero sized. - // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop - // at the i32 element, because it is the last element at that offset. This is - // the right one to return, because anything after it will have a higher - // offset, implying that this element is non-empty. - return SI-&MemberOffsets[0]; -} - -//===----------------------------------------------------------------------===// -// TargetAlignElem, TargetAlign support -//===----------------------------------------------------------------------===// - -TargetAlignElem -TargetAlignElem::get(AlignTypeEnum align_type, unsigned abi_align, - unsigned pref_align, uint32_t bit_width) { - assert(abi_align <= pref_align && "Preferred alignment worse than ABI!"); - TargetAlignElem retval; - retval.AlignType = align_type; - retval.ABIAlign = abi_align; - retval.PrefAlign = pref_align; - retval.TypeBitWidth = bit_width; - return retval; -} - -bool -TargetAlignElem::operator==(const TargetAlignElem &rhs) const { - return (AlignType == rhs.AlignType - && ABIAlign == rhs.ABIAlign - && PrefAlign == rhs.PrefAlign - && TypeBitWidth == rhs.TypeBitWidth); -} - -const TargetAlignElem -TargetData::InvalidAlignmentElem = { (AlignTypeEnum)0xFF, 0, 0, 0 }; - -//===----------------------------------------------------------------------===// -// TargetData Class Implementation -//===----------------------------------------------------------------------===// - -/// getInt - Get an integer ignoring errors. -static int getInt(StringRef R) { - int Result = 0; - R.getAsInteger(10, Result); - return Result; -} - -void TargetData::init() { - initializeTargetDataPass(*PassRegistry::getPassRegistry()); - - LayoutMap = 0; - LittleEndian = false; - PointerMemSize = 8; - PointerABIAlign = 8; - PointerPrefAlign = PointerABIAlign; - StackNaturalAlign = 0; - - // Default alignments - setAlignment(INTEGER_ALIGN, 1, 1, 1); // i1 - setAlignment(INTEGER_ALIGN, 1, 1, 8); // i8 - setAlignment(INTEGER_ALIGN, 2, 2, 16); // i16 - setAlignment(INTEGER_ALIGN, 4, 4, 32); // i32 - setAlignment(INTEGER_ALIGN, 4, 8, 64); // i64 - setAlignment(FLOAT_ALIGN, 2, 2, 16); // half - setAlignment(FLOAT_ALIGN, 4, 4, 32); // float - setAlignment(FLOAT_ALIGN, 8, 8, 64); // double - setAlignment(FLOAT_ALIGN, 16, 16, 128); // ppcf128, quad, ... - setAlignment(VECTOR_ALIGN, 8, 8, 64); // v2i32, v1i64, ... - setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ... - setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct -} - -std::string TargetData::parseSpecifier(StringRef Desc, TargetData *td) { - - if (td) - td->init(); - - while (!Desc.empty()) { - std::pair<StringRef, StringRef> Split = Desc.split('-'); - StringRef Token = Split.first; - Desc = Split.second; - - if (Token.empty()) - continue; - - Split = Token.split(':'); - StringRef Specifier = Split.first; - Token = Split.second; - - assert(!Specifier.empty() && "Can't be empty here"); - - switch (Specifier[0]) { - case 'E': - if (td) - td->LittleEndian = false; - break; - case 'e': - if (td) - td->LittleEndian = true; - break; - case 'p': { - // Pointer size. - Split = Token.split(':'); - int PointerMemSizeBits = getInt(Split.first); - if (PointerMemSizeBits < 0 || PointerMemSizeBits % 8 != 0) - return "invalid pointer size, must be a positive 8-bit multiple"; - if (td) - td->PointerMemSize = PointerMemSizeBits / 8; - - // Pointer ABI alignment. - Split = Split.second.split(':'); - int PointerABIAlignBits = getInt(Split.first); - if (PointerABIAlignBits < 0 || PointerABIAlignBits % 8 != 0) { - return "invalid pointer ABI alignment, " - "must be a positive 8-bit multiple"; - } - if (td) - td->PointerABIAlign = PointerABIAlignBits / 8; - - // Pointer preferred alignment. - Split = Split.second.split(':'); - int PointerPrefAlignBits = getInt(Split.first); - if (PointerPrefAlignBits < 0 || PointerPrefAlignBits % 8 != 0) { - return "invalid pointer preferred alignment, " - "must be a positive 8-bit multiple"; - } - if (td) { - td->PointerPrefAlign = PointerPrefAlignBits / 8; - if (td->PointerPrefAlign == 0) - td->PointerPrefAlign = td->PointerABIAlign; - } - break; - } - case 'i': - case 'v': - case 'f': - case 'a': - case 's': { - AlignTypeEnum AlignType; - char field = Specifier[0]; - switch (field) { - default: - case 'i': AlignType = INTEGER_ALIGN; break; - case 'v': AlignType = VECTOR_ALIGN; break; - case 'f': AlignType = FLOAT_ALIGN; break; - case 'a': AlignType = AGGREGATE_ALIGN; break; - case 's': AlignType = STACK_ALIGN; break; - } - int Size = getInt(Specifier.substr(1)); - if (Size < 0) { - return std::string("invalid ") + field + "-size field, " - "must be positive"; - } - - Split = Token.split(':'); - int ABIAlignBits = getInt(Split.first); - if (ABIAlignBits < 0 || ABIAlignBits % 8 != 0) { - return std::string("invalid ") + field +"-abi-alignment field, " - "must be a positive 8-bit multiple"; - } - unsigned ABIAlign = ABIAlignBits / 8; - - Split = Split.second.split(':'); - - int PrefAlignBits = getInt(Split.first); - if (PrefAlignBits < 0 || PrefAlignBits % 8 != 0) { - return std::string("invalid ") + field +"-preferred-alignment field, " - "must be a positive 8-bit multiple"; - } - unsigned PrefAlign = PrefAlignBits / 8; - if (PrefAlign == 0) - PrefAlign = ABIAlign; - - if (td) - td->setAlignment(AlignType, ABIAlign, PrefAlign, Size); - break; - } - case 'n': // Native integer types. - Specifier = Specifier.substr(1); - do { - int Width = getInt(Specifier); - if (Width <= 0) { - return std::string("invalid native integer size \'") + Specifier.str() + - "\', must be a positive integer."; - } - if (td && Width != 0) - td->LegalIntWidths.push_back(Width); - Split = Token.split(':'); - Specifier = Split.first; - Token = Split.second; - } while (!Specifier.empty() || !Token.empty()); - break; - case 'S': { // Stack natural alignment. - int StackNaturalAlignBits = getInt(Specifier.substr(1)); - if (StackNaturalAlignBits < 0 || StackNaturalAlignBits % 8 != 0) { - return "invalid natural stack alignment (S-field), " - "must be a positive 8-bit multiple"; - } - if (td) - td->StackNaturalAlign = StackNaturalAlignBits / 8; - break; - } - default: - break; - } - } - - return ""; -} - -/// Default ctor. -/// -/// @note This has to exist, because this is a pass, but it should never be -/// used. -TargetData::TargetData() : ImmutablePass(ID) { - report_fatal_error("Bad TargetData ctor used. " - "Tool did not specify a TargetData to use?"); -} - -TargetData::TargetData(const Module *M) - : ImmutablePass(ID) { - std::string errMsg = parseSpecifier(M->getDataLayout(), this); - assert(errMsg == "" && "Module M has malformed target data layout string."); - (void)errMsg; -} - -void -TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align, - unsigned pref_align, uint32_t bit_width) { - assert(abi_align <= pref_align && "Preferred alignment worse than ABI!"); - for (unsigned i = 0, e = Alignments.size(); i != e; ++i) { - if (Alignments[i].AlignType == align_type && - Alignments[i].TypeBitWidth == bit_width) { - // Update the abi, preferred alignments. - Alignments[i].ABIAlign = abi_align; - Alignments[i].PrefAlign = pref_align; - return; - } - } - - Alignments.push_back(TargetAlignElem::get(align_type, abi_align, - pref_align, bit_width)); -} - -/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or -/// preferred if ABIInfo = false) the target wants for the specified datatype. -unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, - uint32_t BitWidth, bool ABIInfo, - Type *Ty) const { - // Check to see if we have an exact match and remember the best match we see. - int BestMatchIdx = -1; - int LargestInt = -1; - for (unsigned i = 0, e = Alignments.size(); i != e; ++i) { - if (Alignments[i].AlignType == AlignType && - Alignments[i].TypeBitWidth == BitWidth) - return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign; - - // The best match so far depends on what we're looking for. - if (AlignType == INTEGER_ALIGN && - Alignments[i].AlignType == INTEGER_ALIGN) { - // The "best match" for integers is the smallest size that is larger than - // the BitWidth requested. - if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 || - Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth)) - BestMatchIdx = i; - // However, if there isn't one that's larger, then we must use the - // largest one we have (see below) - if (LargestInt == -1 || - Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth) - LargestInt = i; - } - } - - // Okay, we didn't find an exact solution. Fall back here depending on what - // is being looked for. - if (BestMatchIdx == -1) { - // If we didn't find an integer alignment, fall back on most conservative. - if (AlignType == INTEGER_ALIGN) { - BestMatchIdx = LargestInt; - } else { - assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!"); - - // By default, use natural alignment for vector types. This is consistent - // with what clang and llvm-gcc do. - unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType()); - Align *= cast<VectorType>(Ty)->getNumElements(); - // If the alignment is not a power of 2, round up to the next power of 2. - // This happens for non-power-of-2 length vectors. - if (Align & (Align-1)) - Align = NextPowerOf2(Align); - return Align; - } - } - - // Since we got a "best match" index, just return it. - return ABIInfo ? Alignments[BestMatchIdx].ABIAlign - : Alignments[BestMatchIdx].PrefAlign; -} - -namespace { - -class StructLayoutMap { - typedef DenseMap<StructType*, StructLayout*> LayoutInfoTy; - LayoutInfoTy LayoutInfo; - -public: - virtual ~StructLayoutMap() { - // Remove any layouts. - for (LayoutInfoTy::iterator I = LayoutInfo.begin(), E = LayoutInfo.end(); - I != E; ++I) { - StructLayout *Value = I->second; - Value->~StructLayout(); - free(Value); - } - } - - StructLayout *&operator[](StructType *STy) { - return LayoutInfo[STy]; - } - - // for debugging... - virtual void dump() const {} -}; - -} // end anonymous namespace - -TargetData::~TargetData() { - delete static_cast<StructLayoutMap*>(LayoutMap); -} - -const StructLayout *TargetData::getStructLayout(StructType *Ty) const { - if (!LayoutMap) - LayoutMap = new StructLayoutMap(); - - StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap); - StructLayout *&SL = (*STM)[Ty]; - if (SL) return SL; - - // Otherwise, create the struct layout. Because it is variable length, we - // malloc it, then use placement new. - int NumElts = Ty->getNumElements(); - StructLayout *L = - (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t)); - - // Set SL before calling StructLayout's ctor. The ctor could cause other - // entries to be added to TheMap, invalidating our reference. - SL = L; - - new (L) StructLayout(Ty, *this); - - return L; -} - -std::string TargetData::getStringRepresentation() const { - std::string Result; - raw_string_ostream OS(Result); - - OS << (LittleEndian ? "e" : "E") - << "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8 - << ':' << PointerPrefAlign*8 - << "-S" << StackNaturalAlign*8; - - for (unsigned i = 0, e = Alignments.size(); i != e; ++i) { - const TargetAlignElem &AI = Alignments[i]; - OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':' - << AI.ABIAlign*8 << ':' << AI.PrefAlign*8; - } - - if (!LegalIntWidths.empty()) { - OS << "-n" << (unsigned)LegalIntWidths[0]; - - for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i) - OS << ':' << (unsigned)LegalIntWidths[i]; - } - return OS.str(); -} - - -uint64_t TargetData::getTypeSizeInBits(Type *Ty) const { - assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); - switch (Ty->getTypeID()) { - case Type::LabelTyID: - case Type::PointerTyID: - return getPointerSizeInBits(); - case Type::ArrayTyID: { - ArrayType *ATy = cast<ArrayType>(Ty); - return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements(); - } - case Type::StructTyID: - // Get the layout annotation... which is lazily created on demand. - return getStructLayout(cast<StructType>(Ty))->getSizeInBits(); - case Type::IntegerTyID: - return cast<IntegerType>(Ty)->getBitWidth(); - case Type::VoidTyID: - return 8; - case Type::HalfTyID: - return 16; - case Type::FloatTyID: - return 32; - case Type::DoubleTyID: - case Type::X86_MMXTyID: - return 64; - case Type::PPC_FP128TyID: - case Type::FP128TyID: - return 128; - // In memory objects this is always aligned to a higher boundary, but - // only 80 bits contain information. - case Type::X86_FP80TyID: - return 80; - case Type::VectorTyID: - return cast<VectorType>(Ty)->getBitWidth(); - default: - llvm_unreachable("TargetData::getTypeSizeInBits(): Unsupported type"); - } -} - -/*! - \param abi_or_pref Flag that determines which alignment is returned. true - returns the ABI alignment, false returns the preferred alignment. - \param Ty The underlying type for which alignment is determined. - - Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref - == false) for the requested type \a Ty. - */ -unsigned TargetData::getAlignment(Type *Ty, bool abi_or_pref) const { - int AlignType = -1; - - assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); - switch (Ty->getTypeID()) { - // Early escape for the non-numeric types. - case Type::LabelTyID: - case Type::PointerTyID: - return (abi_or_pref - ? getPointerABIAlignment() - : getPointerPrefAlignment()); - case Type::ArrayTyID: - return getAlignment(cast<ArrayType>(Ty)->getElementType(), abi_or_pref); - - case Type::StructTyID: { - // Packed structure types always have an ABI alignment of one. - if (cast<StructType>(Ty)->isPacked() && abi_or_pref) - return 1; - - // Get the layout annotation... which is lazily created on demand. - const StructLayout *Layout = getStructLayout(cast<StructType>(Ty)); - unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty); - return std::max(Align, Layout->getAlignment()); - } - case Type::IntegerTyID: - case Type::VoidTyID: - AlignType = INTEGER_ALIGN; - break; - case Type::HalfTyID: - case Type::FloatTyID: - case Type::DoubleTyID: - // PPC_FP128TyID and FP128TyID have different data contents, but the - // same size and alignment, so they look the same here. - case Type::PPC_FP128TyID: - case Type::FP128TyID: - case Type::X86_FP80TyID: - AlignType = FLOAT_ALIGN; - break; - case Type::X86_MMXTyID: - case Type::VectorTyID: - AlignType = VECTOR_ALIGN; - break; - default: - llvm_unreachable("Bad type for getAlignment!!!"); - } - - return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty), - abi_or_pref, Ty); -} - -unsigned TargetData::getABITypeAlignment(Type *Ty) const { - return getAlignment(Ty, true); -} - -/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for -/// an integer type of the specified bitwidth. -unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const { - return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0); -} - - -unsigned TargetData::getCallFrameTypeAlignment(Type *Ty) const { - for (unsigned i = 0, e = Alignments.size(); i != e; ++i) - if (Alignments[i].AlignType == STACK_ALIGN) - return Alignments[i].ABIAlign; - - return getABITypeAlignment(Ty); -} - -unsigned TargetData::getPrefTypeAlignment(Type *Ty) const { - return getAlignment(Ty, false); -} - -unsigned TargetData::getPreferredTypeAlignmentShift(Type *Ty) const { - unsigned Align = getPrefTypeAlignment(Ty); - assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); - return Log2_32(Align); -} - -/// getIntPtrType - Return an unsigned integer type that is the same size or -/// greater to the host pointer size. -IntegerType *TargetData::getIntPtrType(LLVMContext &C) const { - return IntegerType::get(C, getPointerSizeInBits()); -} - - -uint64_t TargetData::getIndexedOffset(Type *ptrTy, - ArrayRef<Value *> Indices) const { - Type *Ty = ptrTy; - assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()"); - uint64_t Result = 0; - - generic_gep_type_iterator<Value* const*> - TI = gep_type_begin(ptrTy, Indices); - for (unsigned CurIDX = 0, EndIDX = Indices.size(); CurIDX != EndIDX; - ++CurIDX, ++TI) { - if (StructType *STy = dyn_cast<StructType>(*TI)) { - assert(Indices[CurIDX]->getType() == - Type::getInt32Ty(ptrTy->getContext()) && - "Illegal struct idx"); - unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue(); - - // Get structure layout information... - const StructLayout *Layout = getStructLayout(STy); - - // Add in the offset, as calculated by the structure layout info... - Result += Layout->getElementOffset(FieldNo); - - // Update Ty to refer to current element - Ty = STy->getElementType(FieldNo); - } else { - // Update Ty to refer to current element - Ty = cast<SequentialType>(Ty)->getElementType(); - - // Get the array index and the size of each array element. - if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue()) - Result += (uint64_t)arrayIdx * getTypeAllocSize(Ty); - } - } - - return Result; -} - -/// getPreferredAlignment - Return the preferred alignment of the specified -/// global. This includes an explicitly requested alignment (if the global -/// has one). -unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const { - Type *ElemType = GV->getType()->getElementType(); - unsigned Alignment = getPrefTypeAlignment(ElemType); - unsigned GVAlignment = GV->getAlignment(); - if (GVAlignment >= Alignment) { - Alignment = GVAlignment; - } else if (GVAlignment != 0) { - Alignment = std::max(GVAlignment, getABITypeAlignment(ElemType)); - } - - if (GV->hasInitializer() && GVAlignment == 0) { - if (Alignment < 16) { - // If the global is not external, see if it is large. If so, give it a - // larger alignment. - if (getTypeSizeInBits(ElemType) > 128) - Alignment = 16; // 16-byte alignment. - } - } - return Alignment; -} - -/// getPreferredAlignmentLog - Return the preferred alignment of the -/// specified global, returned in log form. This includes an explicitly -/// requested alignment (if the global has one). -unsigned TargetData::getPreferredAlignmentLog(const GlobalVariable *GV) const { - return Log2_32(getPreferredAlignment(GV)); -} diff --git a/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp b/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp deleted file mode 100644 index a661ee9..0000000 --- a/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//===-- lib/Target/TargetELFWriterInfo.cpp - ELF Writer Info --0-*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the TargetELFWriterInfo class. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Function.h" -#include "llvm/Target/TargetELFWriterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -TargetELFWriterInfo::TargetELFWriterInfo(bool is64Bit_, bool isLittleEndian_) : - is64Bit(is64Bit_), isLittleEndian(isLittleEndian_) { -} - -TargetELFWriterInfo::~TargetELFWriterInfo() {} - diff --git a/contrib/llvm/lib/Target/TargetLibraryInfo.cpp b/contrib/llvm/lib/Target/TargetLibraryInfo.cpp index 8e215a7..6d4eab1 100644 --- a/contrib/llvm/lib/Target/TargetLibraryInfo.cpp +++ b/contrib/llvm/lib/Target/TargetLibraryInfo.cpp @@ -24,6 +24,16 @@ void TargetLibraryInfo::anchor() { } const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = { + "_ZdaPv", + "_ZdlPv", + "_Znaj", + "_ZnajRKSt9nothrow_t", + "_Znam", + "_ZnamRKSt9nothrow_t", + "_Znwj", + "_ZnwjRKSt9nothrow_t", + "_Znwm", + "_ZnwmRKSt9nothrow_t", "__cxa_atexit", "__cxa_guard_abort", "__cxa_guard_acquire", @@ -31,16 +41,29 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "__memcpy_chk", "acos", "acosf", + "acosh", + "acoshf", + "acoshl", "acosl", "asin", "asinf", + "asinh", + "asinhf", + "asinhl", "asinl", "atan", "atan2", "atan2f", "atan2l", "atanf", + "atanh", + "atanhf", + "atanhl", "atanl", + "calloc", + "cbrt", + "cbrtf", + "cbrtl", "ceil", "ceilf", "ceill", @@ -54,6 +77,9 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "coshl", "cosl", "exp", + "exp10", + "exp10f", + "exp10l", "exp2", "exp2f", "exp2l", @@ -74,6 +100,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "fmodl", "fputc", "fputs", + "free", "fwrite", "iprintf", "log", @@ -86,8 +113,12 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "log2", "log2f", "log2l", + "logb", + "logbf", + "logbl", "logf", "logl", + "malloc", "memchr", "memcmp", "memcpy", @@ -97,11 +128,14 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "nearbyint", "nearbyintf", "nearbyintl", + "posix_memalign", "pow", "powf", "powl", "putchar", "puts", + "realloc", + "reallocf", "rint", "rintf", "rintl", @@ -118,14 +152,30 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "sqrt", "sqrtf", "sqrtl", + "stpcpy", "strcat", "strchr", + "strcmp", "strcpy", + "strcspn", + "strdup", "strlen", "strncat", "strncmp", "strncpy", + "strndup", "strnlen", + "strpbrk", + "strrchr", + "strspn", + "strstr", + "strtod", + "strtof", + "strtol", + "strtold", + "strtoll", + "strtoul", + "strtoull", "tan", "tanf", "tanh", @@ -134,7 +184,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "tanl", "trunc", "truncf", - "truncl" + "truncl", + "valloc" }; /// initialize - Initialize the set of available library functions based on the @@ -205,6 +256,21 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, TLI.setUnavailable(LibFunc::tanhl); // Win32 only has C89 math + TLI.setUnavailable(LibFunc::acosh); + TLI.setUnavailable(LibFunc::acoshf); + TLI.setUnavailable(LibFunc::acoshl); + TLI.setUnavailable(LibFunc::asinh); + TLI.setUnavailable(LibFunc::asinhf); + TLI.setUnavailable(LibFunc::asinhl); + TLI.setUnavailable(LibFunc::atanh); + TLI.setUnavailable(LibFunc::atanhf); + TLI.setUnavailable(LibFunc::atanhl); + TLI.setUnavailable(LibFunc::cbrt); + TLI.setUnavailable(LibFunc::cbrtf); + TLI.setUnavailable(LibFunc::cbrtl); + TLI.setUnavailable(LibFunc::exp10); + TLI.setUnavailable(LibFunc::exp10f); + TLI.setUnavailable(LibFunc::exp10l); TLI.setUnavailable(LibFunc::exp2); TLI.setUnavailable(LibFunc::exp2f); TLI.setUnavailable(LibFunc::exp2l); @@ -217,6 +283,9 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, TLI.setUnavailable(LibFunc::log1p); TLI.setUnavailable(LibFunc::log1pf); TLI.setUnavailable(LibFunc::log1pl); + TLI.setUnavailable(LibFunc::logb); + TLI.setUnavailable(LibFunc::logbf); + TLI.setUnavailable(LibFunc::logbl); TLI.setUnavailable(LibFunc::nearbyint); TLI.setUnavailable(LibFunc::nearbyintf); TLI.setUnavailable(LibFunc::nearbyintl); @@ -254,6 +323,10 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, TLI.setUnavailable(LibFunc::tanf); TLI.setUnavailable(LibFunc::tanhf); } + + // Win32 does *not* provide stpcpy. It is provided on POSIX systems: + // http://pubs.opengroup.org/onlinepubs/9699919799/functions/stpcpy.html + TLI.setUnavailable(LibFunc::stpcpy); } } diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp index b74a0bd..9d7e2b8 100644 --- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp +++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp @@ -22,7 +22,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Dwarf.h" @@ -184,7 +184,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, // Otherwise, just drop it into a mergable constant section. If we have // a section for this size, use it, otherwise use the arbitrary sized // mergable section. - switch (TM.getTargetData()->getTypeAllocSize(C->getType())) { + switch (TM.getDataLayout()->getTypeAllocSize(C->getType())) { case 4: return SectionKind::getMergeableConst4(); case 8: return SectionKind::getMergeableConst8(); case 16: return SectionKind::getMergeableConst16(); diff --git a/contrib/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm/lib/Target/TargetMachineC.cpp index d6bba8b..f69c2ab 100644 --- a/contrib/llvm/lib/Target/TargetMachineC.cpp +++ b/contrib/llvm/lib/Target/TargetMachineC.cpp @@ -14,7 +14,7 @@ #include "llvm-c/Core.h" #include "llvm-c/Target.h" #include "llvm-c/TargetMachine.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -146,7 +146,7 @@ char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) { } LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) { - return wrap(unwrap(T)->getTargetData()); + return wrap(unwrap(T)->getDataLayout()); } LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, @@ -158,14 +158,14 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, std::string error; - const TargetData* td = TM->getTargetData(); + const DataLayout* td = TM->getDataLayout(); if (!td) { - error = "No TargetData in TargetMachine"; + error = "No DataLayout in TargetMachine"; *ErrorMessage = strdup(error.c_str()); return true; } - pass.add(new TargetData(*td)); + pass.add(new DataLayout(*td)); TargetMachine::CodeGenFileType ft; switch (codegen) { @@ -184,7 +184,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, } if (TM->addPassesToEmitFile(pass, destf, ft)) { - error = "No TargetData in TargetMachine"; + error = "No DataLayout in TargetMachine"; *ErrorMessage = strdup(error.c_str()); return true; } diff --git a/contrib/llvm/lib/Target/TargetRegisterInfo.cpp b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp index 2395f2b..be8b582 100644 --- a/contrib/llvm/lib/Target/TargetRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp @@ -20,8 +20,10 @@ using namespace llvm; TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, regclass_iterator RCB, regclass_iterator RCE, - const char *const *subregindexnames) - : InfoDesc(ID), SubRegIndexNames(subregindexnames), + const char *const *SRINames, + const unsigned *SRILaneMasks) + : InfoDesc(ID), SubRegIndexNames(SRINames), + SubRegIndexLaneMasks(SRILaneMasks), RegClassBegin(RCB), RegClassEnd(RCE) { } diff --git a/contrib/llvm/lib/Target/TargetTransformImpl.cpp b/contrib/llvm/lib/Target/TargetTransformImpl.cpp new file mode 100644 index 0000000..b36e6f8 --- /dev/null +++ b/contrib/llvm/lib/Target/TargetTransformImpl.cpp @@ -0,0 +1,353 @@ +// llvm/Target/TargetTransformImpl.cpp - Target Loop Trans Info ---*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetTransformImpl.h" +#include "llvm/Target/TargetLowering.h" +#include <utility> + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// +// Calls used by scalar transformations. +// +//===----------------------------------------------------------------------===// + +bool ScalarTargetTransformImpl::isLegalAddImmediate(int64_t imm) const { + return TLI->isLegalAddImmediate(imm); +} + +bool ScalarTargetTransformImpl::isLegalICmpImmediate(int64_t imm) const { + return TLI->isLegalICmpImmediate(imm); +} + +bool ScalarTargetTransformImpl::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + return TLI->isLegalAddressingMode(AM, Ty); +} + +bool ScalarTargetTransformImpl::isTruncateFree(Type *Ty1, Type *Ty2) const { + return TLI->isTruncateFree(Ty1, Ty2); +} + +bool ScalarTargetTransformImpl::isTypeLegal(Type *Ty) const { + EVT T = TLI->getValueType(Ty); + return TLI->isTypeLegal(T); +} + +unsigned ScalarTargetTransformImpl::getJumpBufAlignment() const { + return TLI->getJumpBufAlignment(); +} + +unsigned ScalarTargetTransformImpl::getJumpBufSize() const { + return TLI->getJumpBufSize(); +} + +bool ScalarTargetTransformImpl::shouldBuildLookupTables() const { + return TLI->supportJumpTables() && + (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); +} + +//===----------------------------------------------------------------------===// +// +// Calls used by the vectorizers. +// +//===----------------------------------------------------------------------===// +int VectorTargetTransformImpl::InstructionOpcodeToISD(unsigned Opcode) const { + enum InstructionOpcodes { +#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, +#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM +#include "llvm/Instruction.def" + }; + switch (static_cast<InstructionOpcodes>(Opcode)) { + case Ret: return 0; + case Br: return 0; + case Switch: return 0; + case IndirectBr: return 0; + case Invoke: return 0; + case Resume: return 0; + case Unreachable: return 0; + case Add: return ISD::ADD; + case FAdd: return ISD::FADD; + case Sub: return ISD::SUB; + case FSub: return ISD::FSUB; + case Mul: return ISD::MUL; + case FMul: return ISD::FMUL; + case UDiv: return ISD::UDIV; + case SDiv: return ISD::UDIV; + case FDiv: return ISD::FDIV; + case URem: return ISD::UREM; + case SRem: return ISD::SREM; + case FRem: return ISD::FREM; + case Shl: return ISD::SHL; + case LShr: return ISD::SRL; + case AShr: return ISD::SRA; + case And: return ISD::AND; + case Or: return ISD::OR; + case Xor: return ISD::XOR; + case Alloca: return 0; + case Load: return ISD::LOAD; + case Store: return ISD::STORE; + case GetElementPtr: return 0; + case Fence: return 0; + case AtomicCmpXchg: return 0; + case AtomicRMW: return 0; + case Trunc: return ISD::TRUNCATE; + case ZExt: return ISD::ZERO_EXTEND; + case SExt: return ISD::SIGN_EXTEND; + case FPToUI: return ISD::FP_TO_UINT; + case FPToSI: return ISD::FP_TO_SINT; + case UIToFP: return ISD::UINT_TO_FP; + case SIToFP: return ISD::SINT_TO_FP; + case FPTrunc: return ISD::FP_ROUND; + case FPExt: return ISD::FP_EXTEND; + case PtrToInt: return ISD::BITCAST; + case IntToPtr: return ISD::BITCAST; + case BitCast: return ISD::BITCAST; + case ICmp: return ISD::SETCC; + case FCmp: return ISD::SETCC; + case PHI: return 0; + case Call: return 0; + case Select: return ISD::SELECT; + case UserOp1: return 0; + case UserOp2: return 0; + case VAArg: return 0; + case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; + case InsertElement: return ISD::INSERT_VECTOR_ELT; + case ShuffleVector: return ISD::VECTOR_SHUFFLE; + case ExtractValue: return ISD::MERGE_VALUES; + case InsertValue: return ISD::MERGE_VALUES; + case LandingPad: return 0; + } + + llvm_unreachable("Unknown instruction type encountered!"); +} + +std::pair<unsigned, MVT> +VectorTargetTransformImpl::getTypeLegalizationCost(Type *Ty) const { + + LLVMContext &C = Ty->getContext(); + EVT MTy = TLI->getValueType(Ty); + + unsigned Cost = 1; + // We keep legalizing the type until we find a legal kind. We assume that + // the only operation that costs anything is the split. After splitting + // we need to handle two types. + while (true) { + TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, MTy); + + if (LK.first == TargetLowering::TypeLegal) + return std::make_pair(Cost, MTy.getSimpleVT()); + + if (LK.first == TargetLowering::TypeSplitVector || + LK.first == TargetLowering::TypeExpandInteger) + Cost *= 2; + + // Keep legalizing the type. + MTy = LK.second; + } +} + +unsigned +VectorTargetTransformImpl::getScalarizationOverhead(Type *Ty, + bool Insert, + bool Extract) const { + assert (Ty->isVectorTy() && "Can only scalarize vectors"); + unsigned Cost = 0; + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + if (Insert) + Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i); + if (Extract) + Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + + return Cost; +} + +unsigned VectorTargetTransformImpl::getArithmeticInstrCost(unsigned Opcode, + Type *Ty) const { + // Check if any of the operands are vector operands. + int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Ty); + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. Multiply + // by the type-legalization overhead. + return LT.first * 1; + } + + // Else, assume that we need to scalarize this op. + if (Ty->isVectorTy()) { + unsigned Num = Ty->getVectorNumElements(); + unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); + // return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(Ty, true, true) + Num * Cost; + } + + // We don't know anything about this scalar instruction. + return 1; +} + +unsigned VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const { + return 1; +} + +unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + std::pair<unsigned, MVT> SrcLT = getTypeLegalizationCost(Src); + std::pair<unsigned, MVT> DstLT = getTypeLegalizationCost(Dst); + + // Handle scalar conversions. + if (!Src->isVectorTy() && !Dst->isVectorTy()) { + + // Scalar bitcasts are usually free. + if (Opcode == Instruction::BitCast) + return 0; + + if (Opcode == Instruction::Trunc && + TLI->isTruncateFree(SrcLT.second, DstLT.second)) + return 0; + + if (Opcode == Instruction::ZExt && + TLI->isZExtFree(SrcLT.second, DstLT.second)) + return 0; + + // Just check the op cost. If the operation is legal then assume it costs 1. + if (!TLI->isOperationExpand(ISD, DstLT.second)) + return 1; + + // Assume that illegal scalar instruction are expensive. + return 4; + } + + // Check vector-to-vector casts. + if (Dst->isVectorTy() && Src->isVectorTy()) { + + // If the cast is between same-sized registers, then the check is simple. + if (SrcLT.first == DstLT.first && + SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { + + // Bitcast between types that are legalized to the same type are free. + if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) + return 0; + + // Assume that Zext is done using AND. + if (Opcode == Instruction::ZExt) + return 1; + + // Assume that sext is done using SHL and SRA. + if (Opcode == Instruction::SExt) + return 2; + + // Just check the op cost. If the operation is legal then assume it costs + // 1 and multiply by the type-legalization overhead. + if (!TLI->isOperationExpand(ISD, DstLT.second)) + return SrcLT.first * 1; + } + + // If we are converting vectors and the operation is illegal, or + // if the vectors are legalized to different types, estimate the + // scalarization costs. + unsigned Num = Dst->getVectorNumElements(); + unsigned Cost = getCastInstrCost(Opcode, Dst->getScalarType(), + Src->getScalarType()); + + // Return the cost of multiple scalar invocation plus the cost of + // inserting and extracting the values. + return getScalarizationOverhead(Dst, true, true) + Num * Cost; + } + + // We already handled vector-to-vector and scalar-to-scalar conversions. This + // is where we handle bitcast between vectors and scalars. We need to assume + // that the conversion is scalarized in one way or another. + if (Opcode == Instruction::BitCast) + // Illegal bitcasts are done by storing and loading from a stack slot. + return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) + + (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0); + + llvm_unreachable("Unhandled cast"); + } + +unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const { + return 1; +} + +unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode, + Type *ValTy, + Type *CondTy) const { + int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // Selects on vectors are actually vector selects. + if (ISD == ISD::SELECT) { + assert(CondTy && "CondTy must exist"); + if (CondTy->isVectorTy()) + ISD = ISD::VSELECT; + } + + std::pair<unsigned, MVT> LT = getTypeLegalizationCost(ValTy); + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. Multiply + // by the type-legalization overhead. + return LT.first * 1; + } + + // Otherwise, assume that the cast is scalarized. + if (ValTy->isVectorTy()) { + unsigned Num = ValTy->getVectorNumElements(); + if (CondTy) + CondTy = CondTy->getScalarType(); + unsigned Cost = getCmpSelInstrCost(Opcode, ValTy->getScalarType(), + CondTy); + + // Return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(ValTy, true, false) + Num * Cost; + } + + // Unknown scalar opcode. + return 1; +} + +unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode, + Type *Val, + unsigned Index) const { + return 1; +} + +unsigned +VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1, + Type *Ty2) const { + return 1; +} + +unsigned +VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Src); + + // Assume that all loads of legal types cost 1. + return LT.first; +} + +unsigned +VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const { + std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Tp); + return LT.first; +} diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp index 2794e60..66ad353 100644 --- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp +++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp @@ -18,19 +18,19 @@ using namespace llvm; namespace { - + class X86AsmLexer : public MCTargetAsmLexer { const MCAsmInfo &AsmInfo; - + bool tentativeIsValid; AsmToken tentativeToken; - + const AsmToken &lexTentative() { tentativeToken = getLexer()->Lex(); tentativeIsValid = true; return tentativeToken; } - + const AsmToken &lexDefinite() { if (tentativeIsValid) { tentativeIsValid = false; @@ -38,7 +38,7 @@ class X86AsmLexer : public MCTargetAsmLexer { } return getLexer()->Lex(); } - + AsmToken LexTokenATT(); AsmToken LexTokenIntel(); protected: @@ -47,7 +47,7 @@ protected: SetError(SMLoc(), "No MCAsmLexer installed"); return AsmToken(AsmToken::Error, "", 0); } - + switch (AsmInfo.getAssemblerDialect()) { default: SetError(SMLoc(), "Unhandled dialect"); @@ -71,33 +71,32 @@ public: AsmToken X86AsmLexer::LexTokenATT() { AsmToken lexedToken = lexDefinite(); - + switch (lexedToken.getKind()) { default: return lexedToken; case AsmToken::Error: SetError(Lexer->getErrLoc(), Lexer->getErr()); return lexedToken; - + case AsmToken::Percent: { const AsmToken &nextToken = lexTentative(); if (nextToken.getKind() != AsmToken::Identifier) return lexedToken; - if (unsigned regID = MatchRegisterName(nextToken.getString())) { lexDefinite(); - + // FIXME: This is completely wrong when there is a space or other // punctuation between the % and the register name. StringRef regStr(lexedToken.getString().data(), - lexedToken.getString().size() + + lexedToken.getString().size() + nextToken.getString().size()); - - return AsmToken(AsmToken::Register, regStr, + + return AsmToken(AsmToken::Register, regStr, static_cast<int64_t>(regID)); } - + // Match register name failed. If this is "db[0-7]", match it as an alias // for dr[0-7]. if (nextToken.getString().size() == 3 && @@ -113,29 +112,29 @@ AsmToken X86AsmLexer::LexTokenATT() { case '6': RegNo = X86::DR6; break; case '7': RegNo = X86::DR7; break; } - + if (RegNo != -1) { lexDefinite(); // FIXME: This is completely wrong when there is a space or other // punctuation between the % and the register name. StringRef regStr(lexedToken.getString().data(), - lexedToken.getString().size() + + lexedToken.getString().size() + nextToken.getString().size()); - return AsmToken(AsmToken::Register, regStr, + return AsmToken(AsmToken::Register, regStr, static_cast<int64_t>(RegNo)); } } - - + + return lexedToken; - } + } } } AsmToken X86AsmLexer::LexTokenIntel() { const AsmToken &lexedToken = lexDefinite(); - + switch(lexedToken.getKind()) { default: return lexedToken; @@ -144,7 +143,7 @@ AsmToken X86AsmLexer::LexTokenIntel() { return lexedToken; case AsmToken::Identifier: { unsigned regID = MatchRegisterName(lexedToken.getString().lower()); - + if (regID) return AsmToken(AsmToken::Register, lexedToken.getString(), diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index fbbaa9500..ce446e7 100644 --- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -11,12 +11,14 @@ #include "llvm/MC/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" @@ -33,13 +35,16 @@ struct X86Operand; class X86AsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + ParseInstructionInfo *InstInfo; private: MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } bool Error(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(), + bool MatchingInlineAsm = false) { + if (MatchingInlineAsm) return true; return Parser.Error(L, Msg, Ranges); } @@ -51,23 +56,25 @@ private: X86Operand *ParseOperand(); X86Operand *ParseATTOperand(); X86Operand *ParseIntelOperand(); - X86Operand *ParseIntelMemOperand(); + X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc); + X86Operand *ParseIntelTypeOperator(SMLoc StartLoc); + X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc); X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); + bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp, + SmallString<64> &Err); + bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); bool processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); - bool MatchAndEmitInstruction(SMLoc IDLoc, + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out); - - bool MatchInstruction(SMLoc IDLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - SmallVectorImpl<MCInst> &MCInsts); + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. @@ -96,14 +103,15 @@ private: public: X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) - : MCTargetAsmParser(), STI(sti), Parser(parser) { + : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) { // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, + virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); @@ -159,6 +167,7 @@ struct X86Operand : public MCParsedAsmOperand { } Kind; SMLoc StartLoc, EndLoc; + SMLoc OffsetOfLoc; union { struct { @@ -172,6 +181,7 @@ struct X86Operand : public MCParsedAsmOperand { struct { const MCExpr *Val; + bool NeedAsmRewrite; } Imm; struct { @@ -181,6 +191,7 @@ struct X86Operand : public MCParsedAsmOperand { unsigned IndexReg; unsigned Scale; unsigned Size; + bool NeedSizeDir; } Mem; }; @@ -191,8 +202,11 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } - + /// getLocRange - Get the range between the first and last token of this + /// operand. SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } + /// getOffsetOfLoc - Get the location of the offset operator. + SMLoc getOffsetOfLoc() const { return OffsetOfLoc; } virtual void print(raw_ostream &OS) const {} @@ -216,6 +230,11 @@ struct X86Operand : public MCParsedAsmOperand { return Imm.Val; } + bool needAsmRewrite() const { + assert(Kind == Immediate && "Invalid access!"); + return Imm.NeedAsmRewrite; + } + const MCExpr *getMemDisp() const { assert(Kind == Memory && "Invalid access!"); return Mem.Disp; @@ -312,6 +331,20 @@ struct X86Operand : public MCParsedAsmOperand { return isImmSExti64i32Value(CE->getValue()); } + unsigned getMemSize() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.Size; + } + + bool isOffsetOf() const { + return OffsetOfLoc.getPointer(); + } + + bool needSizeDirective() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.NeedSizeDir; + } + bool isMem() const { return Kind == Memory; } bool isMem8() const { return Kind == Memory && (!Mem.Size || Mem.Size == 8); @@ -437,21 +470,25 @@ struct X86Operand : public MCParsedAsmOperand { return Res; } - static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) { + static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, + SMLoc OffsetOfLoc = SMLoc()) { X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); Res->Reg.RegNo = RegNo; + Res->OffsetOfLoc = OffsetOfLoc; return Res; } - static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ + static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc, + bool NeedRewrite = true){ X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); Res->Imm.Val = Val; + Res->Imm.NeedAsmRewrite = NeedRewrite; return Res; } /// Create an absolute memory operand. - static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, - SMLoc EndLoc, unsigned Size = 0) { + static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, + unsigned Size = 0, bool NeedSizeDir = false){ X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -459,6 +496,7 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = 0; Res->Mem.Scale = 1; Res->Mem.Size = Size; + Res->Mem.NeedSizeDir = NeedSizeDir; return Res; } @@ -466,7 +504,7 @@ struct X86Operand : public MCParsedAsmOperand { static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0) { + unsigned Size = 0, bool NeedSizeDir = false) { // We should never just have a displacement, that should be parsed as an // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); @@ -481,6 +519,7 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = IndexReg; Res->Mem.Scale = Scale; Res->Mem.Size = Size; + Res->Mem.NeedSizeDir = NeedSizeDir; return Res; } }; @@ -510,12 +549,13 @@ bool X86AsmParser::isDstOp(X86Operand &Op) { bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { RegNo = 0; - if (!isParsingIntelSyntax()) { - const AsmToken &TokPercent = Parser.getTok(); - assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!"); - StartLoc = TokPercent.getLoc(); + const AsmToken &PercentTok = Parser.getTok(); + StartLoc = PercentTok.getLoc(); + + // If we encounter a %, ignore it. This code handles registers with and + // without the prefix, unprefixed registers can occur in cfi directives. + if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) Parser.Lex(); // Eat percent token. - } const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) { @@ -621,23 +661,25 @@ X86Operand *X86AsmParser::ParseOperand() { /// getIntelMemOperandSize - Return intel memory operand size. static unsigned getIntelMemOperandSize(StringRef OpStr) { - unsigned Size = 0; - if (OpStr == "BYTE") Size = 8; - if (OpStr == "WORD") Size = 16; - if (OpStr == "DWORD") Size = 32; - if (OpStr == "QWORD") Size = 64; - if (OpStr == "XWORD") Size = 80; - if (OpStr == "XMMWORD") Size = 128; - if (OpStr == "YMMWORD") Size = 256; + unsigned Size = StringSwitch<unsigned>(OpStr) + .Cases("BYTE", "byte", 8) + .Cases("WORD", "word", 16) + .Cases("DWORD", "dword", 32) + .Cases("QWORD", "qword", 64) + .Cases("XWORD", "xword", 80) + .Cases("XMMWORD", "xmmword", 128) + .Cases("YMMWORD", "ymmword", 256) + .Default(0); return Size; } -X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, +X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, unsigned Size) { unsigned BaseReg = 0, IndexReg = 0, Scale = 1; - SMLoc Start = Parser.getTok().getLoc(), End; + const AsmToken &Tok = Parser.getTok(); + SMLoc Start = Tok.getLoc(), End; - const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); + const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ] // Eat '[' @@ -653,15 +695,17 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().isNot(AsmToken::RBrac)) return ErrorOperand(Start, "Expected ']' token!"); Parser.Lex(); + End = Tok.getLoc(); return X86Operand::CreateMem(Disp, Start, End, Size); } } else if (getLexer().is(AsmToken::Integer)) { - int64_t Val = Parser.getTok().getIntVal(); + int64_t Val = Tok.getIntVal(); Parser.Lex(); - SMLoc Loc = Parser.getTok().getLoc(); + SMLoc Loc = Tok.getLoc(); if (getLexer().is(AsmToken::RBrac)) { // Handle '[' number ']' Parser.Lex(); + End = Tok.getLoc(); const MCExpr *Disp = MCConstantExpr::Create(Val, getContext()); if (SegReg) return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale, @@ -670,7 +714,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, } else if (getLexer().is(AsmToken::Star)) { // Handle '[' Scale*IndexReg ']' Parser.Lex(); - SMLoc IdxRegLoc = Parser.getTok().getLoc(); + SMLoc IdxRegLoc = Tok.getLoc(); if (ParseRegister(IndexReg, IdxRegLoc, End)) return ErrorOperand(IdxRegLoc, "Expected register"); Scale = Val; @@ -678,16 +722,27 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, return ErrorOperand(Loc, "Unexpected token"); } - if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus)) { - bool isPlus = getLexer().is(AsmToken::Plus); + // Parse ][ as a plus. + bool ExpectRBrac = true; + if (getLexer().is(AsmToken::RBrac)) { + ExpectRBrac = false; Parser.Lex(); - SMLoc PlusLoc = Parser.getTok().getLoc(); + End = Tok.getLoc(); + } + + if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus) || + getLexer().is(AsmToken::LBrac)) { + ExpectRBrac = true; + bool isPlus = getLexer().is(AsmToken::Plus) || + getLexer().is(AsmToken::LBrac); + Parser.Lex(); + SMLoc PlusLoc = Tok.getLoc(); if (getLexer().is(AsmToken::Integer)) { - int64_t Val = Parser.getTok().getIntVal(); + int64_t Val = Tok.getIntVal(); Parser.Lex(); if (getLexer().is(AsmToken::Star)) { Parser.Lex(); - SMLoc IdxRegLoc = Parser.getTok().getLoc(); + SMLoc IdxRegLoc = Tok.getLoc(); if (ParseRegister(IndexReg, IdxRegLoc, End)) return ErrorOperand(IdxRegLoc, "Expected register"); Scale = Val; @@ -698,21 +753,48 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, return ErrorOperand(PlusLoc, "unexpected token after +"); } else if (getLexer().is(AsmToken::Identifier)) { // This could be an index register or a displacement expression. - End = Parser.getTok().getLoc(); + End = Tok.getLoc(); if (!IndexReg) ParseRegister(IndexReg, Start, End); else if (getParser().ParseExpression(Disp, End)) return 0; } } + + // Parse ][ as a plus. + if (getLexer().is(AsmToken::RBrac)) { + ExpectRBrac = false; + Parser.Lex(); + End = Tok.getLoc(); + if (getLexer().is(AsmToken::LBrac)) { + ExpectRBrac = true; + Parser.Lex(); + if (getParser().ParseExpression(Disp, End)) + return 0; + } + } else if (ExpectRBrac) { + if (getParser().ParseExpression(Disp, End)) + return 0; + } - if (getLexer().isNot(AsmToken::RBrac)) - if (getParser().ParseExpression(Disp, End)) return 0; + if (ExpectRBrac) { + if (getLexer().isNot(AsmToken::RBrac)) + return ErrorOperand(End, "expected ']' token!"); + Parser.Lex(); + End = Tok.getLoc(); + } - End = Parser.getTok().getLoc(); - if (getLexer().isNot(AsmToken::RBrac)) - return ErrorOperand(End, "expected ']' token!"); - Parser.Lex(); - End = Parser.getTok().getLoc(); + // Parse the dot operator (e.g., [ebx].foo.bar). + if (Tok.getString().startswith(".")) { + SmallString<64> Err; + const MCExpr *NewDisp; + if (ParseIntelDotOperator(Disp, &NewDisp, Err)) + return ErrorOperand(Tok.getLoc(), Err); + + Parser.Lex(); // Eat the field. + Disp = NewDisp; + } + + End = Tok.getLoc(); // handle [-42] if (!BaseReg && !IndexReg) @@ -723,15 +805,15 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, } /// ParseIntelMemOperand - Parse intel style memory operand. -X86Operand *X86AsmParser::ParseIntelMemOperand() { +X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { const AsmToken &Tok = Parser.getTok(); - SMLoc Start = Parser.getTok().getLoc(), End; - unsigned SegReg = 0; + SMLoc End; unsigned Size = getIntelMemOperandSize(Tok.getString()); if (Size) { Parser.Lex(); - assert (Tok.getString() == "PTR" && "Unexpected token!"); + assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") && + "Unexpected token!"); Parser.Lex(); } @@ -750,12 +832,164 @@ X86Operand *X86AsmParser::ParseIntelMemOperand() { const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); if (getParser().ParseExpression(Disp, End)) return 0; - return X86Operand::CreateMem(Disp, Start, End, Size); + End = Parser.getTok().getLoc(); + + bool NeedSizeDir = false; + if (!Size && isParsingInlineAsm()) { + if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { + const MCSymbol &Sym = SymRef->getSymbol(); + // FIXME: The SemaLookup will fail if the name is anything other then an + // identifier. + // FIXME: Pass a valid SMLoc. + SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size); + NeedSizeDir = Size > 0; + } + } + if (!isParsingInlineAsm()) + return X86Operand::CreateMem(Disp, Start, End, Size); + else + // When parsing inline assembly we set the base register to a non-zero value + // as we don't know the actual value at this time. This is necessary to + // get the matching correct in some cases. + return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, + /*Scale*/1, Start, End, Size, NeedSizeDir); +} + +/// Parse the '.' operator. +bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, + const MCExpr **NewDisp, + SmallString<64> &Err) { + AsmToken Tok = *&Parser.getTok(); + uint64_t OrigDispVal, DotDispVal; + + // FIXME: Handle non-constant expressions. + if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) { + OrigDispVal = OrigDisp->getValue(); + } else { + Err = "Non-constant offsets are not supported!"; + return true; + } + + // Drop the '.'. + StringRef DotDispStr = Tok.getString().drop_front(1); + + // .Imm gets lexed as a real. + if (Tok.is(AsmToken::Real)) { + APInt DotDisp; + DotDispStr.getAsInteger(10, DotDisp); + DotDispVal = DotDisp.getZExtValue(); + } else if (Tok.is(AsmToken::Identifier)) { + // We should only see an identifier when parsing the original inline asm. + // The front-end should rewrite this in terms of immediates. + assert (isParsingInlineAsm() && "Unexpected field name!"); + + unsigned DotDisp; + std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); + if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, + DotDisp)) { + Err = "Unable to lookup field reference!"; + return true; + } + DotDispVal = DotDisp; + } else { + Err = "Unexpected token type!"; + return true; + } + + if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { + SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); + unsigned Len = DotDispStr.size(); + unsigned Val = OrigDispVal + DotDispVal; + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len, + Val)); + } + + *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); + return false; +} + +/// Parse the 'offset' operator. This operator is used to specify the +/// location rather then the content of a variable. +X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { + SMLoc OffsetOfLoc = Start; + Parser.Lex(); // Eat offset. + Start = Parser.getTok().getLoc(); + assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier"); + + SMLoc End; + const MCExpr *Val; + if (getParser().ParseExpression(Val, End)) + return ErrorOperand(Start, "Unable to parse expression!"); + + End = Parser.getTok().getLoc(); + + // Don't emit the offset operator. + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7)); + + // The offset operator will have an 'r' constraint, thus we need to create + // register operand to ensure proper matching. Just pick a GPR based on + // the size of a pointer. + unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc); +} + +/// Parse the 'TYPE' operator. The TYPE operator returns the size of a C or +/// C++ type or variable. If the variable is an array, TYPE returns the size of +/// a single element of the array. +X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) { + SMLoc TypeLoc = Start; + Parser.Lex(); // Eat offset. + Start = Parser.getTok().getLoc(); + assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier"); + + SMLoc End; + const MCExpr *Val; + if (getParser().ParseExpression(Val, End)) + return 0; + + End = Parser.getTok().getLoc(); + + unsigned Size = 0; + if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) { + const MCSymbol &Sym = SymRef->getSymbol(); + // FIXME: The SemaLookup will fail if the name is anything other then an + // identifier. + // FIXME: Pass a valid SMLoc. + if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size)) + return ErrorOperand(Start, "Unable to lookup TYPE of expr!"); + + Size /= 8; // Size is in terms of bits, but we want bytes in the context. + } + + // Rewrite the type operator and the C or C++ type or variable in terms of an + // immediate. E.g. TYPE foo -> $$4 + unsigned Len = End.getPointer() - TypeLoc.getPointer(); + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, Size)); + + const MCExpr *Imm = MCConstantExpr::Create(Size, getContext()); + return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false); } X86Operand *X86AsmParser::ParseIntelOperand() { SMLoc Start = Parser.getTok().getLoc(), End; + // offset operator. + StringRef AsmTokStr = Parser.getTok().getString(); + if ((AsmTokStr == "offset" || AsmTokStr == "OFFSET") && + isParsingInlineAsm()) + return ParseIntelOffsetOfOperator(Start); + + // Type directive. + if ((AsmTokStr == "type" || AsmTokStr == "TYPE") && + isParsingInlineAsm()) + return ParseIntelTypeOperator(Start); + + // Unsupported directives. + if (isParsingIntelSyntax() && + (AsmTokStr == "size" || AsmTokStr == "SIZE" || + AsmTokStr == "length" || AsmTokStr == "LENGTH")) + return ErrorOperand(Start, "Unsupported directive!"); + // immediate. if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || getLexer().is(AsmToken::Minus)) { @@ -769,12 +1003,17 @@ X86Operand *X86AsmParser::ParseIntelOperand() { // register unsigned RegNo = 0; if (!ParseRegister(RegNo, Start, End)) { - End = Parser.getTok().getLoc(); - return X86Operand::CreateReg(RegNo, Start, End); + // If this is a segment register followed by a ':', then this is the start + // of a memory reference, otherwise this is a normal register reference. + if (getLexer().isNot(AsmToken::Colon)) + return X86Operand::CreateReg(RegNo, Start, Parser.getTok().getLoc()); + + getParser().Lex(); // Eat the colon. + return ParseIntelMemOperand(RegNo, Start); } // mem operand - return ParseIntelMemOperand(); + return ParseIntelMemOperand(0, Start); } X86Operand *X86AsmParser::ParseATTOperand() { @@ -972,8 +1211,9 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { } bool X86AsmParser:: -ParseInstruction(StringRef Name, SMLoc NameLoc, +ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + InstInfo = &Info; StringRef PatchedName = Name; // FIXME: Hack to recognize setneb as setne. @@ -1509,28 +1749,18 @@ processInstruction(MCInst &Inst, } bool X86AsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, +MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out) { - SmallVector<MCInst, 2> Insts; - bool Error = MatchInstruction(IDLoc, Operands, Insts); - if (!Error) - for (unsigned i = 0, e = Insts.size(); i != e; ++i) - Out.EmitInstruction(Insts[i]); - return Error; -} - -bool X86AsmParser:: -MatchInstruction(SMLoc IDLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - SmallVectorImpl<MCInst> &MCInsts) { + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { assert(!Operands.empty() && "Unexpect empty operand list!"); X86Operand *Op = static_cast<X86Operand*>(Operands[0]); assert(Op->isToken() && "Leading operand should always be a mnemonic!"); + ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>(); // First, handle aliases that expand to multiple instructions. // FIXME: This should be replaced with a real .td file alias mechanism. - // Also, MatchInstructionImpl should do actually *do* the EmitInstruction + // Also, MatchInstructionImpl should actually *do* the EmitInstruction // call. if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" || Op->getToken() == "fstsww" || Op->getToken() == "fstcww" || @@ -1539,7 +1769,8 @@ MatchInstruction(SMLoc IDLoc, MCInst Inst; Inst.setOpcode(X86::WAIT); Inst.setLoc(IDLoc); - MCInsts.push_back(Inst); + if (!MatchingInlineAsm) + Out.EmitInstruction(Inst); const char *Repl = StringSwitch<const char*>(Op->getToken()) @@ -1558,28 +1789,30 @@ MatchInstruction(SMLoc IDLoc, } bool WasOriginallyInvalidOperand = false; - unsigned OrigErrorInfo; MCInst Inst; // First, try a direct match. - switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo, + switch (MatchInstructionImpl(Operands, Inst, + ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax())) { default: break; case Match_Success: // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the // individual transformations can chain off each other. - while (processInstruction(Inst, Operands)) - ; + if (!MatchingInlineAsm) + while (processInstruction(Inst, Operands)) + ; Inst.setLoc(IDLoc); - MCInsts.push_back(Inst); + if (!MatchingInlineAsm) + Out.EmitInstruction(Inst); + Opcode = Inst.getOpcode(); return false; case Match_MissingFeature: - Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + Error(IDLoc, "instruction requires a CPU feature not currently enabled", + EmptyRanges, MatchingInlineAsm); return true; - case Match_ConversionFail: - return Error(IDLoc, "unable to convert operands to instruction"); case Match_InvalidOperand: WasOriginallyInvalidOperand = true; break; @@ -1612,13 +1845,17 @@ MatchInstruction(SMLoc IDLoc, unsigned ErrorInfoIgnore; unsigned Match1, Match2, Match3, Match4; - Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); Tmp[Base.size()] = Suffixes[1]; - Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); Tmp[Base.size()] = Suffixes[2]; - Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); Tmp[Base.size()] = Suffixes[3]; - Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); // Restore the old token. Op->setTokenValue(Base); @@ -1631,7 +1868,9 @@ MatchInstruction(SMLoc IDLoc, (Match3 == Match_Success) + (Match4 == Match_Success); if (NumSuccessfulMatches == 1) { Inst.setLoc(IDLoc); - MCInsts.push_back(Inst); + if (!MatchingInlineAsm) + Out.EmitInstruction(Inst); + Opcode = Inst.getOpcode(); return false; } @@ -1658,7 +1897,7 @@ MatchInstruction(SMLoc IDLoc, OS << "'" << Base << MatchChars[i] << "'"; } OS << ")"; - Error(IDLoc, OS.str()); + Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm); return true; } @@ -1669,31 +1908,36 @@ MatchInstruction(SMLoc IDLoc, if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { if (!WasOriginallyInvalidOperand) { + ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges : + Op->getLocRange(); return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", - Op->getLocRange()); + Ranges, MatchingInlineAsm); } // Recover location info for the operand if we know which was the problem. - if (OrigErrorInfo != ~0U) { - if (OrigErrorInfo >= Operands.size()) - return Error(IDLoc, "too few operands for instruction"); + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction", + EmptyRanges, MatchingInlineAsm); - X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo]; + X86Operand *Operand = (X86Operand*)Operands[ErrorInfo]; if (Operand->getStartLoc().isValid()) { SMRange OperandRange = Operand->getLocRange(); return Error(Operand->getStartLoc(), "invalid operand for instruction", - OperandRange); + OperandRange, MatchingInlineAsm); } } - return Error(IDLoc, "invalid operand for instruction"); + return Error(IDLoc, "invalid operand for instruction", EmptyRanges, + MatchingInlineAsm); } // If one instruction matched with a missing feature, report this as a // missing feature. if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) + (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){ - Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + Error(IDLoc, "instruction requires a CPU feature not currently enabled", + EmptyRanges, MatchingInlineAsm); return true; } @@ -1701,12 +1945,14 @@ MatchInstruction(SMLoc IDLoc, // operand failure. if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) + (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){ - Error(IDLoc, "invalid operand for instruction"); + Error(IDLoc, "invalid operand for instruction", EmptyRanges, + MatchingInlineAsm); return true; } // If all of these were an outright failure, report it in a useless way. - Error(IDLoc, "unknown use of instruction mnemonic without a size suffix"); + Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", + EmptyRanges, MatchingInlineAsm); return true; } @@ -1717,7 +1963,10 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { return ParseDirectiveWord(2, DirectiveID.getLoc()); else if (IDVal.startswith(".code")) return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); - else if (IDVal.startswith(".intel_syntax")) { + else if (IDVal.startswith(".att_syntax")) { + getParser().setAssemblerDialect(0); + return false; + } else if (IDVal.startswith(".intel_syntax")) { getParser().setAssemblerDialect(1); if (getLexer().isNot(AsmToken::EndOfStatement)) { if(Parser.getTok().getString() == "noprefix") { diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 5039887..f136927 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -44,7 +44,7 @@ void x86DisassemblerDebug(const char *file, dbgs() << file << ":" << line << ": " << s; } -const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii) { +const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii) { const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii); return MII->getName(Opcode); } @@ -95,8 +95,8 @@ const EDInstInfo *X86GenericDisassembler::getEDInfo() const { /// be a pointer to a MemoryObject. /// @param byte - A pointer to the byte to be read. /// @param address - The address to be read. -static int regionReader(void* arg, uint8_t* byte, uint64_t address) { - MemoryObject* region = static_cast<MemoryObject*>(arg); +static int regionReader(const void* arg, uint8_t* byte, uint64_t address) { + const MemoryObject* region = static_cast<const MemoryObject*>(arg); return region->readByte(address, byte); } @@ -135,10 +135,10 @@ X86GenericDisassembler::getInstruction(MCInst &instr, int ret = decodeInstruction(&internalInstr, regionReader, - (void*)®ion, + (const void*)®ion, loggerFn, (void*)&vStream, - (void*)MII, + (const void*)MII, address, fMode); @@ -379,6 +379,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, } switch (type) { + case TYPE_XMM32: + case TYPE_XMM64: case TYPE_XMM128: mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); return; diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h index 0dbfa26..981701f 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h @@ -78,7 +78,7 @@ uint16_t operands; #define INSTRUCTION_IDS \ - unsigned instructionIDs; + uint16_t instructionIDs; #include "X86DisassemblerDecoderCommon.h" diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index 0c92912..85d8a99 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -138,6 +138,10 @@ static InstrUID decode(OpcodeType type, if (modFromModRM(modRM) == 0x3) return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; + case MODRM_SPLITMISC: + if (modFromModRM(modRM) == 0x3) + return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; + return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; case MODRM_FULL: return modRMTable[dec->instructionIDs+modRM]; } @@ -200,7 +204,7 @@ static void unconsumeByte(struct InternalInstruction* insn) { insn->readerCursor + offset); \ if (ret) \ return ret; \ - combined = combined | ((type)byte << ((type)offset * 8)); \ + combined = combined | ((uint64_t)byte << (offset * 8)); \ } \ *ptr = combined; \ insn->readerCursor += sizeof(type); \ @@ -690,7 +694,7 @@ static int getIDWithAttrMask(uint16_t* instructionID, * @param orig - The instruction that is not 16-bit * @param equiv - The instruction that is 16-bit */ -static BOOL is16BitEquvalent(const char* orig, const char* equiv) { +static BOOL is16BitEquivalent(const char* orig, const char* equiv) { off_t i; for (i = 0;; i++) { @@ -719,7 +723,7 @@ static BOOL is16BitEquvalent(const char* orig, const char* equiv) { * @return - 0 if the ModR/M could be read when needed or was not needed; * nonzero otherwise. */ -static int getID(struct InternalInstruction* insn, void *miiArg) { +static int getID(struct InternalInstruction* insn, const void *miiArg) { uint8_t attrMask; uint16_t instructionID; @@ -856,7 +860,7 @@ static int getID(struct InternalInstruction* insn, void *miiArg) { specWithOpSizeName = x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); - if (is16BitEquvalent(specName, specWithOpSizeName)) { + if (is16BitEquivalent(specName, specWithOpSizeName)) { insn->instructionID = instructionIDWithOpsize; insn->spec = specifierForUID(instructionIDWithOpsize); } else { @@ -1621,10 +1625,10 @@ static int readOperands(struct InternalInstruction* insn) { */ int decodeInstruction(struct InternalInstruction* insn, byteReader_t reader, - void* readerArg, + const void* readerArg, dlog_t logger, void* loggerArg, - void* miiArg, + const void* miiArg, uint64_t startLoc, DisassemblerMode mode) { memset(insn, 0, sizeof(struct InternalInstruction)); diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 797703f..407ead3 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -24,7 +24,7 @@ extern "C" { uint16_t operands; #define INSTRUCTION_IDS \ - unsigned instructionIDs; + uint16_t instructionIDs; #include "X86DisassemblerDecoderCommon.h" @@ -403,7 +403,7 @@ typedef uint8_t BOOL; * be read from. * @return - -1 if the byte cannot be read for any reason; 0 otherwise. */ -typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address); +typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address); /* * dlog_t - Type for the logging function that the consumer can provide to @@ -422,7 +422,7 @@ struct InternalInstruction { /* Reader interface (C) */ byteReader_t reader; /* Opaque value passed to the reader */ - void* readerArg; + const void* readerArg; /* The address of the next byte to read via the reader */ uint64_t readerCursor; @@ -561,10 +561,10 @@ struct InternalInstruction { */ int decodeInstruction(struct InternalInstruction* insn, byteReader_t reader, - void* readerArg, + const void* readerArg, dlog_t logger, void* loggerArg, - void* miiArg, + const void* miiArg, uint64_t startLoc, DisassemblerMode mode); @@ -579,7 +579,7 @@ void x86DisassemblerDebug(const char *file, unsigned line, const char *s); -const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii); +const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii); #ifdef __cplusplus } diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index b0a0e1e..23dfe4b 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -160,6 +160,10 @@ typedef uint16_t InstrUID; * MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode * corresponds to one instruction; otherwise, it corresponds to * a different instruction. + * MODRM_SPLITMISC- If the ModR/M byte is between 0x00 and 0xbf, ModR/M byte + * divided by 8 is used to select instruction; otherwise, each + * value of the ModR/M byte could correspond to a different + * instruction. * MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This corresponds to instructions that use reg field as opcode * MODRM_FULL - Potentially, each value of the ModR/M byte could correspond @@ -169,6 +173,7 @@ typedef uint16_t InstrUID; #define MODRMTYPES \ ENUM_ENTRY(MODRM_ONEENTRY) \ ENUM_ENTRY(MODRM_SPLITRM) \ + ENUM_ENTRY(MODRM_SPLITMISC) \ ENUM_ENTRY(MODRM_SPLITREG) \ ENUM_ENTRY(MODRM_FULL) diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 5118e4c..a4bd114 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86ATTInstPrinter.h" #include "X86InstComments.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" @@ -33,11 +34,19 @@ using namespace llvm; void X86ATTInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - OS << '%' << getRegisterName(RegNo); + OS << markup("<reg:") + << '%' << getRegisterName(RegNo) + << markup(">"); } void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) { + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + + if (TSFlags & X86II::LOCK) + OS << "\tlock\n"; + // Try to print any aliases first. if (!printAliasInstr(MI, OS)) printInstruction(MI, OS); @@ -52,7 +61,8 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O) { - switch (MI->getOperand(Op).getImm()) { + int64_t Imm = MI->getOperand(Op).getImm() & 0xf; + switch (Imm) { default: llvm_unreachable("Invalid ssecc argument!"); case 0: O << "eq"; break; case 1: O << "lt"; break; @@ -70,6 +80,30 @@ void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op, case 0xd: O << "ge"; break; case 0xe: O << "gt"; break; case 0xf: O << "true"; break; + } +} + +void X86ATTInstPrinter::printAVXCC(const MCInst *MI, unsigned Op, + raw_ostream &O) { + int64_t Imm = MI->getOperand(Op).getImm() & 0x1f; + switch (Imm) { + default: llvm_unreachable("Invalid avxcc argument!"); + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + case 8: O << "eq_uq"; break; + case 9: O << "nge"; break; + case 0xa: O << "ngt"; break; + case 0xb: O << "false"; break; + case 0xc: O << "neq_oq"; break; + case 0xd: O << "ge"; break; + case 0xe: O << "gt"; break; + case 0xf: O << "true"; break; case 0x10: O << "eq_os"; break; case 0x11: O << "lt_oq"; break; case 0x12: O << "le_oq"; break; @@ -89,12 +123,12 @@ void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op, } } -/// print_pcrel_imm - This is used to print an immediate value that ends up +/// printPCRelImm - This is used to print an immediate value that ends up /// being encoded as a pc-relative value (e.g. for jumps and calls). These /// print slightly differently than normal immediates. For example, a $ is not /// emitted. -void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { +void X86ATTInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isImm()) O << Op.getImm(); @@ -119,17 +153,21 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { - O << '%' << getRegisterName(Op.getReg()); + printRegName(O, Op.getReg()); } else if (Op.isImm()) { // Print X86 immediates as signed values. - O << '$' << (int64_t)Op.getImm(); + O << markup("<imm:") + << '$' << (int64_t)Op.getImm() + << markup(">"); if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256)) *CommentStream << format("imm = 0x%" PRIX64 "\n", (uint64_t)Op.getImm()); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << '$' << *Op.getExpr(); + O << markup("<imm:") + << '$' << *Op.getExpr() + << markup(">"); } } @@ -140,6 +178,8 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, const MCOperand &DispSpec = MI->getOperand(Op+3); const MCOperand &SegReg = MI->getOperand(Op+4); + O << markup("<mem:"); + // If this has a segment register, print it. if (SegReg.getReg()) { printOperand(MI, Op+4, O); @@ -164,9 +204,15 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, O << ','; printOperand(MI, Op+2, O); unsigned ScaleVal = MI->getOperand(Op+1).getImm(); - if (ScaleVal != 1) - O << ',' << ScaleVal; + if (ScaleVal != 1) { + O << ',' + << markup("<imm:") + << ScaleVal + << markup(">"); + } } O << ')'; } + + O << markup(">"); } diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index 2e00bff..8e09183 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -40,7 +40,8 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS); void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS); - void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &OS); + void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS); + void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &OS); void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 4ea662c..d67aec7 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -1,4 +1,4 @@ -//===-- X86IntelInstPrinter.cpp - AT&T assembly instruction printing ------===// +//===-- X86IntelInstPrinter.cpp - Intel assembly instruction printing -----===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file includes code for rendering MCInst instances as AT&T-style +// This file includes code for rendering MCInst instances as Intel-style // assembly. // //===----------------------------------------------------------------------===// @@ -15,6 +15,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86IntelInstPrinter.h" #include "X86InstComments.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCExpr.h" @@ -32,6 +33,12 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) { + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + + if (TSFlags & X86II::LOCK) + OS << "\tlock\n"; + printInstruction(MI, OS); // Next always print the annotation. @@ -44,7 +51,8 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O) { - switch (MI->getOperand(Op).getImm()) { + int64_t Imm = MI->getOperand(Op).getImm() & 0xf; + switch (Imm) { default: llvm_unreachable("Invalid ssecc argument!"); case 0: O << "eq"; break; case 1: O << "lt"; break; @@ -62,6 +70,30 @@ void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op, case 0xd: O << "ge"; break; case 0xe: O << "gt"; break; case 0xf: O << "true"; break; + } +} + +void X86IntelInstPrinter::printAVXCC(const MCInst *MI, unsigned Op, + raw_ostream &O) { + int64_t Imm = MI->getOperand(Op).getImm() & 0x1f; + switch (Imm) { + default: llvm_unreachable("Invalid avxcc argument!"); + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + case 8: O << "eq_uq"; break; + case 9: O << "nge"; break; + case 0xa: O << "ngt"; break; + case 0xb: O << "false"; break; + case 0xc: O << "neq_oq"; break; + case 0xd: O << "ge"; break; + case 0xe: O << "gt"; break; + case 0xf: O << "true"; break; case 0x10: O << "eq_os"; break; case 0x11: O << "lt_oq"; break; case 0x12: O << "le_oq"; break; @@ -78,14 +110,13 @@ void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op, case 0x1d: O << "ge_oq"; break; case 0x1e: O << "gt_oq"; break; case 0x1f: O << "true_us"; break; - } } -/// print_pcrel_imm - This is used to print an immediate value that ends up +/// printPCRelImm - This is used to print an immediate value that ends up /// being encoded as a pc-relative value. -void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { +void X86IntelInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isImm()) O << Op.getImm(); @@ -153,8 +184,7 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, printOperand(MI, Op+2, O); NeedPlus = true; } - - + if (!DispSpec.isImm()) { if (NeedPlus) O << " + "; assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index 4f5938d..bb769eb 100644 --- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This class prints an X86 MCInst to intel style .s file syntax. +// This class prints an X86 MCInst to Intel style .s file syntax. // //===----------------------------------------------------------------------===// @@ -37,7 +37,8 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O); void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O); - void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &O); + void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { O << "OPAQUE PTR "; diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index c4b75a6..467edad 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -279,9 +279,9 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { Res.setOpcode(RelaxedOp); } -/// writeNopData - Write optimal nops to the output file for the \arg Count +/// writeNopData - Write optimal nops to the output file for the \p Count /// bytes. This returns the number of bytes written. It may return 0 if -/// the \arg Count is more than the maximum optimal nops. +/// the \p Count is more than the maximum optimal nops. bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { static const uint8_t Nops[10][10] = { // nop @@ -354,7 +354,7 @@ public: : ELFX86AsmBackend(T, OSABI, CPU) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createX86ELFObjectWriter(OS, /*Is64Bit*/ false, OSABI); + return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_386); } }; @@ -364,7 +364,7 @@ public: : ELFX86AsmBackend(T, OSABI, CPU) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createX86ELFObjectWriter(OS, /*Is64Bit*/ true, OSABI); + return createX86ELFObjectWriter(OS, /*IsELF64*/ true, OSABI, ELF::EM_X86_64); } }; @@ -455,7 +455,7 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT, String if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) return new DarwinX86_32AsmBackend(T, CPU); - if (TheTriple.isOSWindows()) + if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF) return new WindowsX86AsmBackend(T, false, CPU); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); @@ -468,7 +468,7 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT, String if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) return new DarwinX86_64AsmBackend(T, CPU); - if (TheTriple.isOSWindows()) + if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF) return new WindowsX86AsmBackend(T, true, CPU); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index db597fb..7ea1961 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -276,9 +276,9 @@ namespace X86II { MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36, MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40, MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46, - MRM_D4 = 47, MRM_D8 = 48, MRM_D9 = 49, MRM_DA = 50, - MRM_DB = 51, MRM_DC = 52, MRM_DD = 53, MRM_DE = 54, - MRM_DF = 55, + MRM_D4 = 47, MRM_D5 = 48, MRM_D8 = 49, MRM_D9 = 50, + MRM_DA = 51, MRM_DB = 52, MRM_DC = 53, MRM_DD = 54, + MRM_DE = 55, MRM_DF = 56, /// RawFrmImm8 - This is used for the ENTER instruction, which has two /// immediates, the first of which is a 16-bit immediate (specified by @@ -580,11 +580,11 @@ namespace X86II { case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9: case X86II::MRM_D0: case X86II::MRM_D1: - case X86II::MRM_D4: case X86II::MRM_D8: - case X86II::MRM_D9: case X86II::MRM_DA: - case X86II::MRM_DB: case X86II::MRM_DC: - case X86II::MRM_DD: case X86II::MRM_DE: - case X86II::MRM_DF: + case X86II::MRM_D4: case X86II::MRM_D5: + case X86II::MRM_D8: case X86II::MRM_D9: + case X86II::MRM_DA: case X86II::MRM_DB: + case X86II::MRM_DC: case X86II::MRM_DD: + case X86II::MRM_DE: case X86II::MRM_DF: return -1; } } diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 5a42a80..de80dd8 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -20,7 +20,7 @@ using namespace llvm; namespace { class X86ELFObjectWriter : public MCELFObjectTargetWriter { public: - X86ELFObjectWriter(bool is64Bit, uint8_t OSABI); + X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine); virtual ~X86ELFObjectWriter(); protected: @@ -30,10 +30,11 @@ namespace { }; } -X86ELFObjectWriter::X86ELFObjectWriter(bool Is64Bit, uint8_t OSABI) - : MCELFObjectTargetWriter(Is64Bit, OSABI, - Is64Bit ? ELF::EM_X86_64 : ELF::EM_386, - /*HasRelocationAddend*/ Is64Bit) {} +X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, + uint16_t EMachine) + : MCELFObjectTargetWriter(IsELF64, OSABI, EMachine, + // Only i386 uses Rel instead of RelA. + /*HasRelocationAddend*/ EMachine != ELF::EM_386) {} X86ELFObjectWriter::~X86ELFObjectWriter() {} @@ -48,7 +49,7 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); unsigned Type; - if (is64Bit()) { + if (getEMachine() == ELF::EM_X86_64) { if (IsPCRel) { switch ((unsigned)Fixup.getKind()) { default: llvm_unreachable("invalid fixup kind!"); @@ -130,7 +131,7 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, case FK_Data_1: Type = ELF::R_X86_64_8; break; } } - } else { + } else if (getEMachine() == ELF::EM_386) { if (IsPCRel) { switch ((unsigned)Fixup.getKind()) { default: llvm_unreachable("invalid fixup kind!"); @@ -210,15 +211,17 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, case FK_Data_1: Type = ELF::R_386_8; break; } } - } + } else + llvm_unreachable("Unsupported ELF machine type."); return Type; } MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS, - bool Is64Bit, - uint8_t OSABI) { + bool IsELF64, + uint8_t OSABI, + uint16_t EMachine) { MCELFObjectTargetWriter *MOTW = - new X86ELFObjectWriter(Is64Bit, OSABI); + new X86ELFObjectWriter(IsELF64, OSABI, EMachine); return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); } diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index b0acd7d..16488eb 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -34,6 +34,10 @@ AsmWriterFlavor("x86-asm-syntax", cl::init(ATT), clEnumValN(Intel, "intel", "Emit Intel-style assembly"), clEnumValEnd)); +static cl::opt<bool> +MarkedJTDataRegions("mark-data-regions", cl::init(false), + cl::desc("Mark code section jump table data regions."), + cl::Hidden); void X86MCAsmInfoDarwin::anchor() { } @@ -59,6 +63,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { SupportsDebugInformation = true; DwarfUsesInlineInfoSection = true; + UseDataRegionDirectives = MarkedJTDataRegions; // Exceptions handling ExceptionsType = ExceptionHandling::DwarfCFI; diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 4a38324..122204a 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -16,6 +16,7 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86FixupKinds.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" @@ -28,8 +29,8 @@ using namespace llvm; namespace { class X86MCCodeEmitter : public MCCodeEmitter { - X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT + X86MCCodeEmitter(const X86MCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const X86MCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; const MCSubtargetInfo &STI; MCContext &Ctx; @@ -51,8 +52,8 @@ public: return (STI.getFeatureBits() & X86::Mode64Bit) == 0; } - static unsigned GetX86RegNum(const MCOperand &MO) { - return X86_MC::getX86RegNum(MO.getReg()); + unsigned GetX86RegNum(const MCOperand &MO) const { + return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()) & 0x7; } // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range @@ -64,8 +65,8 @@ public: // VEX.VVVV => XMM9 => ~9 // // See table 4-35 of Intel AVX Programming Reference for details. - static unsigned char getVEXRegisterEncoding(const MCInst &MI, - unsigned OpNum) { + unsigned char getVEXRegisterEncoding(const MCInst &MI, + unsigned OpNum) const { unsigned SrcReg = MI.getOperand(OpNum).getReg(); unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum)); if (X86II::isX86_64ExtendedReg(SrcReg)) @@ -560,15 +561,6 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, } - // Set the vector length to 256-bit if YMM0-YMM15 is used - for (unsigned i = 0; i != MI.getNumOperands(); ++i) { - if (!MI.getOperand(i).isReg()) - continue; - unsigned SrcReg = MI.getOperand(i).getReg(); - if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15) - VEX_L = 1; - } - // Classify VEX_B, VEX_4V, VEX_R, VEX_X unsigned NumOps = Desc.getNumOperands(); unsigned CurOp = 0; @@ -1129,13 +1121,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9: case X86II::MRM_D0: case X86II::MRM_D1: - case X86II::MRM_D4: case X86II::MRM_D8: - case X86II::MRM_D9: case X86II::MRM_DA: - case X86II::MRM_DB: case X86II::MRM_DC: - case X86II::MRM_DD: case X86II::MRM_DE: - case X86II::MRM_DF: case X86II::MRM_E8: - case X86II::MRM_F0: case X86II::MRM_F8: - case X86II::MRM_F9: + case X86II::MRM_D4: case X86II::MRM_D5: + case X86II::MRM_D8: case X86II::MRM_D9: + case X86II::MRM_DA: case X86II::MRM_DB: + case X86II::MRM_DC: case X86II::MRM_DD: + case X86II::MRM_DE: case X86II::MRM_DF: + case X86II::MRM_E8: case X86II::MRM_F0: + case X86II::MRM_F8: case X86II::MRM_F9: EmitByte(BaseOpcode, CurByte, OS); unsigned char MRM; @@ -1150,6 +1142,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM_D0: MRM = 0xD0; break; case X86II::MRM_D1: MRM = 0xD1; break; case X86II::MRM_D4: MRM = 0xD4; break; + case X86II::MRM_D5: MRM = 0xD5; break; case X86II::MRM_D8: MRM = 0xD8; break; case X86II::MRM_D9: MRM = 0xD9; break; case X86II::MRM_DA: MRM = 0xDA; break; diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 3482363..287c9f1 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -209,117 +209,10 @@ unsigned X86_MC::getDwarfRegFlavour(StringRef TT, bool isEH) { return DWARFFlavour::X86_32_Generic; } -/// getX86RegNum - This function maps LLVM register identifiers to their X86 -/// specific numbering, which is used in various places encoding instructions. -unsigned X86_MC::getX86RegNum(unsigned RegNo) { - switch(RegNo) { - case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX; - case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX; - case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX; - case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX; - case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH: - return N86::ESP; - case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH: - return N86::EBP; - case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH: - return N86::ESI; - case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH: - return N86::EDI; - - case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B: - return N86::EAX; - case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B: - return N86::ECX; - case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B: - return N86::EDX; - case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B: - return N86::EBX; - case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B: - return N86::ESP; - case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B: - return N86::EBP; - case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B: - return N86::ESI; - case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B: - return N86::EDI; - - case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3: - case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7: - return RegNo-X86::ST0; - - case X86::XMM0: case X86::XMM8: - case X86::YMM0: case X86::YMM8: case X86::MM0: - return 0; - case X86::XMM1: case X86::XMM9: - case X86::YMM1: case X86::YMM9: case X86::MM1: - return 1; - case X86::XMM2: case X86::XMM10: - case X86::YMM2: case X86::YMM10: case X86::MM2: - return 2; - case X86::XMM3: case X86::XMM11: - case X86::YMM3: case X86::YMM11: case X86::MM3: - return 3; - case X86::XMM4: case X86::XMM12: - case X86::YMM4: case X86::YMM12: case X86::MM4: - return 4; - case X86::XMM5: case X86::XMM13: - case X86::YMM5: case X86::YMM13: case X86::MM5: - return 5; - case X86::XMM6: case X86::XMM14: - case X86::YMM6: case X86::YMM14: case X86::MM6: - return 6; - case X86::XMM7: case X86::XMM15: - case X86::YMM7: case X86::YMM15: case X86::MM7: - return 7; - - case X86::ES: return 0; - case X86::CS: return 1; - case X86::SS: return 2; - case X86::DS: return 3; - case X86::FS: return 4; - case X86::GS: return 5; - - case X86::CR0: case X86::CR8 : case X86::DR0: return 0; - case X86::CR1: case X86::CR9 : case X86::DR1: return 1; - case X86::CR2: case X86::CR10: case X86::DR2: return 2; - case X86::CR3: case X86::CR11: case X86::DR3: return 3; - case X86::CR4: case X86::CR12: case X86::DR4: return 4; - case X86::CR5: case X86::CR13: case X86::DR5: return 5; - case X86::CR6: case X86::CR14: case X86::DR6: return 6; - case X86::CR7: case X86::CR15: case X86::DR7: return 7; - - // Pseudo index registers are equivalent to a "none" - // scaled index (See Intel Manual 2A, table 2-3) - case X86::EIZ: - case X86::RIZ: - return 4; - - default: - assert((int(RegNo) > 0) && "Unknown physical register!"); - return 0; - } -} - void X86_MC::InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI) { // FIXME: TableGen these. for (unsigned Reg = X86::NoRegister+1; Reg < X86::NUM_TARGET_REGS; ++Reg) { - int SEH = X86_MC::getX86RegNum(Reg); - switch (Reg) { - case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B: - case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B: - case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B: - case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B: - case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B: - case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B: - case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B: - case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B: - case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: - case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: - case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: - case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: - SEH += 8; - break; - } + unsigned SEH = MRI->getEncodingValue(Reg); MRI->mapLLVMRegToSEHReg(Reg, SEH); } } @@ -379,11 +272,15 @@ static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) { MAI = new X86_64MCAsmInfoDarwin(TheTriple); else MAI = new X86MCAsmInfoDarwin(TheTriple); + } else if (TheTriple.getEnvironment() == Triple::ELF) { + // Force the use of an ELF container. + MAI = new X86ELFMCAsmInfo(TheTriple); } else if (TheTriple.getOS() == Triple::Win32) { MAI = new X86MCAsmInfoMicrosoft(TheTriple); } else if (TheTriple.getOS() == Triple::MinGW32 || TheTriple.getOS() == Triple::Cygwin) { MAI = new X86MCAsmInfoGNUCOFF(TheTriple); } else { + // The default is ELF. MAI = new X86ELFMCAsmInfo(TheTriple); } @@ -465,7 +362,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) return createMachOStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll); - if (TheTriple.isOSWindows()) + if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF) return createWinCOFFStreamer(Ctx, MAB, *_Emitter, _OS, RelaxAll); return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 4b0cace..981aa1a 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -64,8 +64,6 @@ namespace X86_MC { unsigned getDwarfRegFlavour(StringRef TT, bool isEH); - unsigned getX86RegNum(unsigned RegNo); - void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI); /// createX86MCSubtargetInfo - Create a X86 MCSubtargetInfo instance. @@ -91,8 +89,9 @@ MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, /// createX86ELFObjectWriter - Construct an X86 ELF object writer. MCObjectWriter *createX86ELFObjectWriter(raw_ostream &OS, - bool Is64Bit, - uint8_t OSABI); + bool IsELF64, + uint8_t OSABI, + uint16_t EMachine); /// createX86WinCOFFObjectWriter - Construct an X86 Win COFF object writer. MCObjectWriter *createX86WinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit); } // End llvm namespace diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index f0f1982..7ff058e 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -11,11 +11,13 @@ #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Object/MachOFormat.h" using namespace llvm; @@ -23,7 +25,7 @@ using namespace llvm::object; namespace { class X86MachObjectWriter : public MCMachObjectTargetWriter { - void RecordScatteredRelocation(MachObjectWriter *Writer, + bool RecordScatteredRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -335,7 +337,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, Writer->addRelocation(Fragment->getParent(), MRE); } -void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, +bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -381,6 +383,19 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, // Relocations are written out in reverse order, so the PAIR comes first. if (Type == macho::RIT_Difference || Type == macho::RIT_Generic_LocalDifference) { + // If the offset is too large to fit in a scattered relocation, + // we're hosed. It's an unfortunate limitation of the MachO format. + if (FixupOffset > 0xffffff) { + char Buffer[32]; + format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer)); + Asm.getContext().FatalError(Fixup.getLoc(), + Twine("Section too large, can't encode " + "r_address (") + Buffer + + ") into 24 bits of scattered " + "relocation entry."); + llvm_unreachable("fatal error returned?!"); + } + macho::RelocationEntry MRE; MRE.Word0 = ((0 << 0) | (macho::RIT_Pair << 24) | @@ -389,6 +404,16 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, macho::RF_Scattered); MRE.Word1 = Value2; Writer->addRelocation(Fragment->getParent(), MRE); + } else { + // If the offset is more than 24-bits, it won't fit in a scattered + // relocation offset field, so we fall back to using a non-scattered + // relocation. This is a bit risky, as if the offset reaches out of + // the block and the linker is doing scattered loading on this + // symbol, things can go badly. + // + // Required for 'as' compatibility. + if (FixupOffset > 0xffffff) + return false; } macho::RelocationEntry MRE; @@ -399,6 +424,7 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, macho::RF_Scattered); MRE.Word1 = Value; Writer->addRelocation(Fragment->getParent(), MRE); + return true; } void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, @@ -469,9 +495,11 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // If this is a difference or a defined symbol plus an offset, then we need a // scattered relocation entry. Differences always require scattered // relocations. - if (Target.getSymB()) - return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); + if (Target.getSymB()) { + RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue); + return; + } // Get the symbol data, if any. MCSymbolData *SD = 0; @@ -483,9 +511,13 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, uint32_t Offset = Target.getConstant(); if (IsPCRel) Offset += 1 << Log2Size; - if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD)) - return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); + // Try to record the scattered relocation if needed. Fall back to non + // scattered if necessary (see comments in RecordScatteredRelocation() + // for details). + if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD) && + RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue)) + return; // See <reloc.h>. uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td index 1249781..8ad0bc0 100644 --- a/contrib/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm/lib/Target/X86/X86.td @@ -118,8 +118,13 @@ def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", "Support BMI instructions">; def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", "Support BMI2 instructions">; +def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", + "Support RTM instructions">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; +def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", + "HasSlowDivide", "true", + "Use small divide for positive values less than 256">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -159,8 +164,9 @@ def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; -def : AtomProc<"atom", [ProcIntelAtom, FeatureSSE3, FeatureCMPXCHG16B, - FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP]>; +def : AtomProc<"atom", [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, + FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, + FeatureSlowDivide]>; // "Arrandale" along with corei3 and corei5 def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureFastUAMem, @@ -188,7 +194,8 @@ def : Proc<"core-avx2", [FeatureAVX2, FeatureCMPXCHG16B, FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, - FeatureBMI2, FeatureFMA]>; + FeatureBMI2, FeatureFMA, + FeatureRTM]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [Feature3DNow]>; diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp index db71e27..fdd7125 100644 --- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "X86AsmPrinter.h" -#include "X86MCInstLower.h" #include "X86.h" #include "X86COFFMachineModuleInfo.h" #include "X86MachineFunctionInfo.h" @@ -206,10 +205,10 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, } } -/// print_pcrel_imm - This is used to print an immediate value that ends up +/// printPCRelImm - This is used to print an immediate value that ends up /// being encoded as a pc-relative value. These print slightly differently, for /// example, a $ is not emitted. -void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo, +void X86AsmPrinter::printPCRelImm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNo); switch (MO.getType()) { @@ -233,15 +232,17 @@ void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo, void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O, const char *Modifier) { + raw_ostream &O, const char *Modifier, + unsigned AsmVariant) { const MachineOperand &MO = MI->getOperand(OpNo); switch (MO.getType()) { default: llvm_unreachable("unknown operand type!"); case MachineOperand::MO_Register: { - O << '%'; + // FIXME: Enumerating AsmVariant, so we can remove magic number. + if (AsmVariant == 0) O << '%'; unsigned Reg = MO.getReg(); if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) { - EVT VT = (strcmp(Modifier+6,"64") == 0) ? + MVT::SimpleValueType VT = (strcmp(Modifier+6,"64") == 0) ? MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 : ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8)); Reg = getX86SubSuperRegister(Reg, VT); @@ -265,46 +266,6 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, } } -void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op, - raw_ostream &O) { - unsigned char value = MI->getOperand(Op).getImm(); - switch (value) { - default: llvm_unreachable("Invalid ssecc argument!"); - case 0: O << "eq"; break; - case 1: O << "lt"; break; - case 2: O << "le"; break; - case 3: O << "unord"; break; - case 4: O << "neq"; break; - case 5: O << "nlt"; break; - case 6: O << "nle"; break; - case 7: O << "ord"; break; - case 8: O << "eq_uq"; break; - case 9: O << "nge"; break; - case 0xa: O << "ngt"; break; - case 0xb: O << "false"; break; - case 0xc: O << "neq_oq"; break; - case 0xd: O << "ge"; break; - case 0xe: O << "gt"; break; - case 0xf: O << "true"; break; - case 0x10: O << "eq_os"; break; - case 0x11: O << "lt_oq"; break; - case 0x12: O << "le_oq"; break; - case 0x13: O << "unord_s"; break; - case 0x14: O << "neq_us"; break; - case 0x15: O << "nlt_uq"; break; - case 0x16: O << "nle_uq"; break; - case 0x17: O << "ord_s"; break; - case 0x18: O << "eq_us"; break; - case 0x19: O << "nge_uq"; break; - case 0x1a: O << "ngt_uq"; break; - case 0x1b: O << "false_os"; break; - case 0x1c: O << "neq_os"; break; - case 0x1d: O << "ge_oq"; break; - case 0x1e: O << "gt_oq"; break; - case 0x1f: O << "true_us"; break; - } -} - void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O, const char *Modifier) { const MachineOperand &BaseReg = MI->getOperand(Op); @@ -363,10 +324,51 @@ void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op, printLeaMemReference(MI, Op, O, Modifier); } -void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op, - raw_ostream &O) { - O << *MF->getPICBaseSymbol() << '\n'; - O << *MF->getPICBaseSymbol() << ':'; +void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op, + raw_ostream &O, const char *Modifier, + unsigned AsmVariant){ + const MachineOperand &BaseReg = MI->getOperand(Op); + unsigned ScaleVal = MI->getOperand(Op+1).getImm(); + const MachineOperand &IndexReg = MI->getOperand(Op+2); + const MachineOperand &DispSpec = MI->getOperand(Op+3); + const MachineOperand &SegReg = MI->getOperand(Op+4); + + // If this has a segment register, print it. + if (SegReg.getReg()) { + printOperand(MI, Op+4, O, Modifier, AsmVariant); + O << ':'; + } + + O << '['; + + bool NeedPlus = false; + if (BaseReg.getReg()) { + printOperand(MI, Op, O, Modifier, AsmVariant); + NeedPlus = true; + } + + if (IndexReg.getReg()) { + if (NeedPlus) O << " + "; + if (ScaleVal != 1) + O << ScaleVal << '*'; + printOperand(MI, Op+2, O, Modifier, AsmVariant); + NeedPlus = true; + } + + assert (DispSpec.isImm() && "Displacement is not an immediate!"); + int64_t DispVal = DispSpec.getImm(); + if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) { + if (NeedPlus) { + if (DispVal > 0) + O << " + "; + else { + O << " - "; + DispVal = -DispVal; + } + } + O << DispVal; + } + O << ']'; } bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, @@ -457,7 +459,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return false; case 'P': // This is the operand of a call, treat specially. - print_pcrel_imm(MI, OpNo, O); + printPCRelImm(MI, OpNo, O); return false; case 'n': // Negate the immediate or print a '-' before the operand. @@ -471,7 +473,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, } } - printOperand(MI, OpNo, O); + printOperand(MI, OpNo, O, /*Modifier*/ 0, AsmVariant); return false; } @@ -479,6 +481,11 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { + if (AsmVariant) { + printIntelMemReference(MI, OpNo, O); + return false; + } + if (ExtraCode && ExtraCode[0]) { if (ExtraCode[1] != 0) return true; // Unknown modifier. @@ -680,7 +687,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); if (!Stubs.empty()) { OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.EmitLabel(Stubs[i].first); diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h index 35386cd..61eb14e 100644 --- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h +++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h @@ -34,47 +34,48 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { Subtarget = &TM.getSubtarget<X86Subtarget>(); } - virtual const char *getPassName() const { + virtual const char *getPassName() const LLVM_OVERRIDE { return "X86 AT&T-Style Assembly Printer"; } const X86Subtarget &getSubtarget() const { return *Subtarget; } - virtual void EmitStartOfAsmFile(Module &M); + virtual void EmitStartOfAsmFile(Module &M) LLVM_OVERRIDE; - virtual void EmitEndOfAsmFile(Module &M); + virtual void EmitEndOfAsmFile(Module &M) LLVM_OVERRIDE; - virtual void EmitInstruction(const MachineInstr *MI); + virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE; void printSymbolOperand(const MachineOperand &MO, raw_ostream &O); // These methods are used by the tablegen'erated instruction printer. void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O, - const char *Modifier = 0); - void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); + const char *Modifier = 0, unsigned AsmVariant = 0); + void printPCRelImm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O); - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &OS); - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &OS); - - void printMachineInstruction(const MachineInstr *MI); - void printSSECC(const MachineInstr *MI, unsigned Op, raw_ostream &O); + virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS) LLVM_OVERRIDE; + virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS) LLVM_OVERRIDE; + void printMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O, const char *Modifier=NULL); void printLeaMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O, const char *Modifier=NULL); - void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O); + void printIntelMemReference(const MachineInstr *MI, unsigned Op, + raw_ostream &O, const char *Modifier=NULL, + unsigned AsmVariant = 1); - bool runOnMachineFunction(MachineFunction &F); + virtual bool runOnMachineFunction(MachineFunction &F) LLVM_OVERRIDE; void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); - MachineLocation getDebugValueLocation(const MachineInstr *MI) const; + virtual MachineLocation + getDebugValueLocation(const MachineInstr *MI) const LLVM_OVERRIDE; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h index 471eb31..a5a8dc1 100644 --- a/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/contrib/llvm/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -20,7 +20,7 @@ namespace llvm { class X86MachineFunctionInfo; - class TargetData; + class DataLayout; /// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation /// for X86 COFF targets. diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td index a6d2709..6786756 100644 --- a/contrib/llvm/lib/Target/X86/X86CallingConv.td +++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td @@ -88,6 +88,21 @@ def RetCC_X86_32_Fast : CallingConv<[ CCDelegateTo<RetCC_X86Common> ]>; +// Intel_OCL_BI return-value convention. +def RetCC_Intel_OCL_BI : CallingConv<[ + // Vector types are returned in XMM0,XMM1,XMMM2 and XMM3. + CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64], + CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>, + + // 256-bit FP vectors + // No more than 4 registers + CCIfType<[v8f32, v4f64, v8i32, v4i64], + CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>, + + // i32, i64 in the standard way + CCDelegateTo<RetCC_X86Common> +]>; + // X86-64 C return-value convention. def RetCC_X86_64_C : CallingConv<[ // The X86-64 calling convention always returns FP values in XMM0. @@ -128,6 +143,10 @@ def RetCC_X86_64 : CallingConv<[ // This is the return-value convention used for the entire X86 backend. def RetCC_X86 : CallingConv<[ + + // Check if this is the Intel OpenCL built-ins calling convention + CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<RetCC_Intel_OCL_BI>>, + CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>, CCDelegateTo<RetCC_X86_32> ]>; @@ -235,6 +254,29 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[f80], CCAssignToStack<0, 0>> ]>; +// X86-64 Intel OpenCL built-ins calling convention. +def CC_Intel_OCL_BI : CallingConv<[ + CCIfType<[i32], CCIfSubtarget<"isTargetWin32()", CCAssignToStack<4, 4>>>, + + CCIfType<[i32], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[ECX, EDX, R8D, R9D]>>>, + CCIfType<[i64], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[RCX, RDX, R8, R9 ]>>>, + + CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX]>>, + CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX]>>, + + // The SSE vector arguments are passed in XMM registers. + CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64], + CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>, + + // The 256-bit vector arguments are passed in YMM registers. + CCIfType<[v8f32, v4f64, v8i32, v4i64], + CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>, + + CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>, + CCDelegateTo<CC_X86_64_C> +]>; + + def CC_X86_64_GHC : CallingConv<[ // Promote i8/i16/i32 arguments to i64. CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, @@ -324,7 +366,7 @@ def CC_X86_32_FastCall : CallingConv<[ CCIfNest<CCAssignToReg<[EAX]>>, // The first 2 integer arguments are passed in ECX/EDX - CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>, + CCIfInReg<CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>>, // Otherwise, same as everything else. CCDelegateTo<CC_X86_32_Common> @@ -408,6 +450,7 @@ def CC_X86_64 : CallingConv<[ // This is the argument convention used for the entire X86 backend. def CC_X86 : CallingConv<[ + CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>, CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>, CCDelegateTo<CC_X86_32> ]>; @@ -426,3 +469,17 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>; def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15, (sequence "XMM%u", 6, 15))>; + + +// Standard C + YMM6-15 +def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, + R13, R14, R15, + (sequence "YMM%u", 6, 15))>; + +//Standard C + XMM 8-15 +def CSR_64_Intel_OCL_BI : CalleeSavedRegs<(add CSR_64, + (sequence "XMM%u", 8, 15))>; + +//Standard C + YMM 8-15 +def CSR_64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add CSR_64, + (sequence "YMM%u", 8, 15))>; diff --git a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp index d705049..44db563 100644 --- a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp +++ b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp @@ -26,7 +26,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Function.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" @@ -43,7 +42,7 @@ namespace { template<class CodeEmitter> class Emitter : public MachineFunctionPass { const X86InstrInfo *II; - const TargetData *TD; + const DataLayout *TD; X86TargetMachine &TM; CodeEmitter &MCE; MachineModuleInfo *MMI; @@ -57,7 +56,7 @@ namespace { MCE(mce), PICBaseOffset(0), Is64BitMode(false), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} Emitter(X86TargetMachine &tm, CodeEmitter &mce, - const X86InstrInfo &ii, const TargetData &td, bool is64) + const X86InstrInfo &ii, const DataLayout &td, bool is64) : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm), MCE(mce), PICBaseOffset(0), Is64BitMode(is64), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} @@ -110,6 +109,14 @@ namespace { void emitMemModRMByte(const MachineInstr &MI, unsigned Op, unsigned RegOpcodeField, intptr_t PCAdj = 0); + + unsigned getX86RegNum(unsigned RegNo) const { + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + return TRI->getEncodingValue(RegNo) & 0x7; + } + + unsigned char getVEXRegisterEncoding(const MachineInstr &MI, + unsigned OpNum) const; }; template<class CodeEmitter> @@ -129,13 +136,12 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { MCE.setModuleInfo(MMI); II = TM.getInstrInfo(); - TD = TM.getTargetData(); + TD = TM.getDataLayout(); Is64BitMode = TM.getSubtarget<X86Subtarget>().is64Bit(); IsPIC = TM.getRelocationModel() == Reloc::PIC_; do { - DEBUG(dbgs() << "JITTing function '" - << MF.getFunction()->getName() << "'\n"); + DEBUG(dbgs() << "JITTing function '" << MF.getName() << "'\n"); MCE.startFunction(MF); for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { @@ -365,7 +371,7 @@ inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, template<class CodeEmitter> void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeFld){ - MCE.emitByte(ModRMByte(3, RegOpcodeFld, X86_MC::getX86RegNum(ModRMReg))); + MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg))); } template<class CodeEmitter> @@ -503,7 +509,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, // 2-7) and absolute references. unsigned BaseRegNo = -1U; if (BaseReg != 0 && BaseReg != X86::RIP) - BaseRegNo = X86_MC::getX86RegNum(BaseReg); + BaseRegNo = getX86RegNum(BaseReg); if (// The SIB byte must be used if there is an index register. IndexReg.getReg() == 0 && @@ -579,15 +585,15 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, // Manual 2A, table 2-7. The displacement has already been output. unsigned IndexRegNo; if (IndexReg.getReg()) - IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg()); + IndexRegNo = getX86RegNum(IndexReg.getReg()); else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5) IndexRegNo = 4; emitSIBByte(SS, IndexRegNo, 5); } else { - unsigned BaseRegNo = X86_MC::getX86RegNum(BaseReg); + unsigned BaseRegNo = getX86RegNum(BaseReg); unsigned IndexRegNo; if (IndexReg.getReg()) - IndexRegNo = X86_MC::getX86RegNum(IndexReg.getReg()); + IndexRegNo = getX86RegNum(IndexReg.getReg()); else IndexRegNo = 4; // For example [ESP+1*<noreg>+4] emitSIBByte(SS, IndexRegNo, BaseRegNo); @@ -758,10 +764,12 @@ void Emitter<CodeEmitter>::emitOpcodePrefix(uint64_t TSFlags, // VEX.VVVV => XMM9 => ~9 // // See table 4-35 of Intel AVX Programming Reference for details. -static unsigned char getVEXRegisterEncoding(const MachineInstr &MI, - unsigned OpNum) { +template<class CodeEmitter> +unsigned char +Emitter<CodeEmitter>::getVEXRegisterEncoding(const MachineInstr &MI, + unsigned OpNum) const { unsigned SrcReg = MI.getOperand(OpNum).getReg(); - unsigned SrcRegNum = X86_MC::getX86RegNum(MI.getOperand(OpNum).getReg()); + unsigned SrcRegNum = getX86RegNum(MI.getOperand(OpNum).getReg()); if (X86II::isX86_64ExtendedReg(SrcReg)) SrcRegNum |= 8; @@ -923,17 +931,6 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, } - // Set the vector length to 256-bit if YMM0-YMM15 is used - for (unsigned i = 0; i != MI.getNumOperands(); ++i) { - if (!MI.getOperand(i).isReg()) - continue; - if (MI.getOperand(i).isImplicit()) - continue; - unsigned SrcReg = MI.getOperand(i).getReg(); - if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15) - VEX_L = 1; - } - // Classify VEX_B, VEX_4V, VEX_R, VEX_X unsigned NumOps = Desc->getNumOperands(); unsigned CurOp = 0; @@ -1248,7 +1245,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::AddRegFrm: { MCE.emitByte(BaseOpcode + - X86_MC::getX86RegNum(MI.getOperand(CurOp++).getReg())); + getX86RegNum(MI.getOperand(CurOp++).getReg())); if (CurOp == NumOps) break; @@ -1283,7 +1280,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::MRMDestReg: { MCE.emitByte(BaseOpcode); emitRegModRMByte(MI.getOperand(CurOp).getReg(), - X86_MC::getX86RegNum(MI.getOperand(CurOp+1).getReg())); + getX86RegNum(MI.getOperand(CurOp+1).getReg())); CurOp += 2; break; } @@ -1294,7 +1291,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) SrcRegNum++; emitMemModRMByte(MI, CurOp, - X86_MC::getX86RegNum(MI.getOperand(SrcRegNum).getReg())); + getX86RegNum(MI.getOperand(SrcRegNum).getReg())); CurOp = SrcRegNum + 1; break; } @@ -1310,7 +1307,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, ++SrcRegNum; emitRegModRMByte(MI.getOperand(SrcRegNum).getReg(), - X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg())); + getX86RegNum(MI.getOperand(CurOp).getReg())); // 2 operands skipped with HasMemOp4, compensate accordingly CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1; if (HasVEX_4VOp3) @@ -1332,7 +1329,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? X86II::getSizeOfImm(Desc->TSFlags) : 0; emitMemModRMByte(MI, FirstMemOp, - X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj); + getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj); CurOp += AddrOperands + 1; if (HasVEX_4VOp3) ++CurOp; @@ -1422,7 +1419,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, MCE.emitByte(BaseOpcode); // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). emitRegModRMByte(MI.getOperand(CurOp).getReg(), - X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg())); + getX86RegNum(MI.getOperand(CurOp).getReg())); ++CurOp; break; @@ -1455,7 +1452,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, const MachineOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand : CurOp); ++CurOp; - unsigned RegNum = X86_MC::getX86RegNum(MO.getReg()) << 4; + unsigned RegNum = getX86RegNum(MO.getReg()) << 4; if (X86II::isX86_64ExtendedReg(MO.getReg())) RegNum |= 1 << 7; // If there is an additional 5th operand it must be an immediate, which diff --git a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp deleted file mode 100644 index c1a49a7..0000000 --- a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp +++ /dev/null @@ -1,147 +0,0 @@ -//===-- X86ELFWriterInfo.cpp - ELF Writer Info for the X86 backend --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the X86 backend. -// -//===----------------------------------------------------------------------===// - -#include "X86ELFWriterInfo.h" -#include "X86Relocations.h" -#include "llvm/Function.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" - -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Implementation of the X86ELFWriterInfo class -//===----------------------------------------------------------------------===// - -X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_) - : TargetELFWriterInfo(is64Bit_, isLittleEndian_) { - EMachine = is64Bit ? EM_X86_64 : EM_386; - } - -X86ELFWriterInfo::~X86ELFWriterInfo() {} - -unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { - if (is64Bit) { - switch(MachineRelTy) { - case X86::reloc_pcrel_word: - return ELF::R_X86_64_PC32; - case X86::reloc_absolute_word: - return ELF::R_X86_64_32; - case X86::reloc_absolute_word_sext: - return ELF::R_X86_64_32S; - case X86::reloc_absolute_dword: - return ELF::R_X86_64_64; - case X86::reloc_picrel_word: - default: - llvm_unreachable("unknown x86_64 machine relocation type"); - } - } else { - switch(MachineRelTy) { - case X86::reloc_pcrel_word: - return ELF::R_386_PC32; - case X86::reloc_absolute_word: - return ELF::R_386_32; - case X86::reloc_absolute_word_sext: - case X86::reloc_absolute_dword: - case X86::reloc_picrel_word: - default: - llvm_unreachable("unknown x86 machine relocation type"); - } - } -} - -long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier) const { - if (is64Bit) { - switch(RelTy) { - case ELF::R_X86_64_PC32: return Modifier - 4; - case ELF::R_X86_64_32: - case ELF::R_X86_64_32S: - case ELF::R_X86_64_64: - return Modifier; - default: - llvm_unreachable("unknown x86_64 relocation type"); - } - } else { - switch(RelTy) { - case ELF::R_386_PC32: return Modifier - 4; - case ELF::R_386_32: return Modifier; - default: - llvm_unreachable("unknown x86 relocation type"); - } - } -} - -unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const { - if (is64Bit) { - switch(RelTy) { - case ELF::R_X86_64_PC32: - case ELF::R_X86_64_32: - case ELF::R_X86_64_32S: - return 32; - case ELF::R_X86_64_64: - return 64; - default: - llvm_unreachable("unknown x86_64 relocation type"); - } - } else { - switch(RelTy) { - case ELF::R_386_PC32: - case ELF::R_386_32: - return 32; - default: - llvm_unreachable("unknown x86 relocation type"); - } - } -} - -bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { - if (is64Bit) { - switch(RelTy) { - case ELF::R_X86_64_PC32: - return true; - case ELF::R_X86_64_32: - case ELF::R_X86_64_32S: - case ELF::R_X86_64_64: - return false; - default: - llvm_unreachable("unknown x86_64 relocation type"); - } - } else { - switch(RelTy) { - case ELF::R_386_PC32: - return true; - case ELF::R_386_32: - return false; - default: - llvm_unreachable("unknown x86 relocation type"); - } - } -} - -unsigned X86ELFWriterInfo::getAbsoluteLabelMachineRelTy() const { - return is64Bit ? - X86::reloc_absolute_dword : X86::reloc_absolute_word; -} - -long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset, - unsigned RelOffset, - unsigned RelTy) const { - - if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32) - return SymOffset - (RelOffset + 4); - - llvm_unreachable("computeRelocation unknown for this relocation type"); -} diff --git a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h deleted file mode 100644 index a45b5bb..0000000 --- a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h +++ /dev/null @@ -1,59 +0,0 @@ -//===-- X86ELFWriterInfo.h - ELF Writer Info for X86 ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the X86 backend. -// -//===----------------------------------------------------------------------===// - -#ifndef X86_ELF_WRITER_INFO_H -#define X86_ELF_WRITER_INFO_H - -#include "llvm/Target/TargetELFWriterInfo.h" - -namespace llvm { - - class X86ELFWriterInfo : public TargetELFWriterInfo { - - public: - X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_); - virtual ~X86ELFWriterInfo(); - - /// getRelocationType - Returns the target specific ELF Relocation type. - /// 'MachineRelTy' contains the object code independent relocation type - virtual unsigned getRelocationType(unsigned MachineRelTy) const; - - /// hasRelocationAddend - True if the target uses an addend in the - /// ELF relocation entry. - virtual bool hasRelocationAddend() const { return is64Bit ? true : false; } - - /// getDefaultAddendForRelTy - Gets the default addend value for a - /// relocation entry based on the target ELF relocation type. - virtual long int getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier = 0) const; - - /// getRelTySize - Returns the size of relocatable field in bits - virtual unsigned getRelocationTySize(unsigned RelTy) const; - - /// isPCRelativeRel - True if the relocation type is pc relative - virtual bool isPCRelativeRel(unsigned RelTy) const; - - /// getJumpTableRelocationTy - Returns the machine relocation type used - /// to reference a jumptable. - virtual unsigned getAbsoluteLabelMachineRelTy() const; - - /// computeRelocation - Some relocatable fields could be relocated - /// directly, avoiding the relocation symbol emission, compute the - /// final relocation value for this symbol. - virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, - unsigned RelTy) const; - }; - -} // end llvm namespace - -#endif // X86_ELF_WRITER_INFO_H diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp index e5952aa..d4627c7 100644 --- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp +++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp @@ -45,9 +45,9 @@ class X86FastISel : public FastISel { /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; - /// StackPtr - Register used as the stack pointer. + /// RegInfo - X86 register info. /// - unsigned StackPtr; + const X86RegisterInfo *RegInfo; /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 /// floating point ops. @@ -61,9 +61,9 @@ public: const TargetLibraryInfo *libInfo) : FastISel(funcInfo, libInfo) { Subtarget = &TM.getSubtarget<X86Subtarget>(); - StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf32 = Subtarget->hasSSE1(); + RegInfo = static_cast<const X86RegisterInfo*>(TM.getRegisterInfo()); } virtual bool TargetSelectInstruction(const Instruction *I); @@ -710,6 +710,8 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { bool X86FastISel::X86SelectRet(const Instruction *I) { const ReturnInst *Ret = cast<ReturnInst>(I); const Function &F = *I->getParent()->getParent(); + const X86MachineFunctionInfo *X86MFInfo = + FuncInfo.MF->getInfo<X86MachineFunctionInfo>(); if (!FuncInfo.CanLowerReturn) return false; @@ -724,8 +726,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { return false; // Don't handle popping bytes on return for now. - if (FuncInfo.MF->getInfo<X86MachineFunctionInfo>() - ->getBytesToPopOnReturn() != 0) + if (X86MFInfo->getBytesToPopOnReturn() != 0) return 0; // fastcc with -tailcallopt is intended to provide a guaranteed @@ -809,6 +810,19 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { MRI.addLiveOut(VA.getLocReg()); } + // The x86-64 ABI for returning structs by value requires that we copy + // the sret argument into %rax for the return. We saved the argument into + // a virtual register in the entry block, so now we copy the value out + // and into %rax. + if (Subtarget->is64Bit() && F.hasStructRetAttr()) { + unsigned Reg = X86MFInfo->getSRetReturnReg(); + assert(Reg && + "SRetReturnReg should have been set in LowerFormalArguments()!"); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + X86::RAX).addReg(Reg); + MRI.addLiveOut(X86::RAX); + } + // Now emit the RET. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET)); return true; @@ -1527,9 +1541,9 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget, CallingConv::ID CC = CS.getCallingConv(); if (CC == CallingConv::Fast || CC == CallingConv::GHC) return 0; - if (!CS.paramHasAttr(1, Attribute::StructRet)) + if (!CS.paramHasAttr(1, Attributes::StructRet)) return 0; - if (CS.paramHasAttr(1, Attribute::InReg)) + if (CS.paramHasAttr(1, Attributes::InReg)) return 0; return 4; } @@ -1608,12 +1622,12 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { Value *ArgVal = *i; ISD::ArgFlagsTy Flags; unsigned AttrInd = i - CS.arg_begin() + 1; - if (CS.paramHasAttr(AttrInd, Attribute::SExt)) + if (CS.paramHasAttr(AttrInd, Attributes::SExt)) Flags.setSExt(); - if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) + if (CS.paramHasAttr(AttrInd, Attributes::ZExt)) Flags.setZExt(); - if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) { + if (CS.paramHasAttr(AttrInd, Attributes::ByVal)) { PointerType *Ty = cast<PointerType>(ArgVal->getType()); Type *ElementTy = Ty->getElementType(); unsigned FrameSize = TD.getTypeAllocSize(ElementTy); @@ -1627,9 +1641,9 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { return false; } - if (CS.paramHasAttr(AttrInd, Attribute::InReg)) + if (CS.paramHasAttr(AttrInd, Attributes::InReg)) Flags.setInReg(); - if (CS.paramHasAttr(AttrInd, Attribute::Nest)) + if (CS.paramHasAttr(AttrInd, Attributes::Nest)) Flags.setNest(); // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra @@ -1771,7 +1785,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { } else { unsigned LocMemOffset = VA.getLocMemOffset(); X86AddressMode AM; - AM.Base.Reg = StackPtr; + AM.Base.Reg = RegInfo->getStackRegister(); AM.Disp = LocMemOffset; const Value *ArgVal = ArgVals[VA.getValNo()]; ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()]; @@ -1897,11 +1911,11 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT.getSimpleVT(); MyFlags.Used = !CS.getInstruction()->use_empty(); - if (CS.paramHasAttr(0, Attribute::SExt)) + if (CS.paramHasAttr(0, Attributes::SExt)) MyFlags.Flags.setSExt(); - if (CS.paramHasAttr(0, Attribute::ZExt)) + if (CS.paramHasAttr(0, Attributes::ZExt)) MyFlags.Flags.setZExt(); - if (CS.paramHasAttr(0, Attribute::InReg)) + if (CS.paramHasAttr(0, Attributes::InReg)) MyFlags.Flags.setInReg(); Ins.push_back(MyFlags); } @@ -2014,13 +2028,17 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) { unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { MVT VT; if (!isTypeLegal(C->getType(), VT)) - return false; + return 0; + + // Can't handle alternate code models yet. + if (TM.getCodeModel() != CodeModel::Small) + return 0; // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = NULL; switch (VT.SimpleTy) { - default: return false; + default: return 0; case MVT::i8: Opc = X86::MOV8rm; RC = &X86::GR8RegClass; @@ -2058,7 +2076,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { break; case MVT::f80: // No f80 support yet. - return false; + return 0; } // Materialize addresses with LEA instructions. diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp index 955c75a..791f598 100644 --- a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -171,6 +171,7 @@ namespace { // Shuffle live registers to match the expectations of successor blocks. void finishBlockStack(); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dumpStack() const { dbgs() << "Stack contents:"; for (unsigned i = 0; i != StackTop; ++i) { @@ -181,6 +182,7 @@ namespace { dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]); dbgs() << "\n"; } +#endif /// getSlot - Return the stack slot number a particular register number is /// in. @@ -575,8 +577,8 @@ namespace { friend bool operator<(const TableEntry &TE, unsigned V) { return TE.from < V; } - friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V, - const TableEntry &TE) { + friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V, + const TableEntry &TE) { return V < TE.from; } }; diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp index 2238688..369589d 100644 --- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -25,7 +25,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/ADT/SmallSet.h" @@ -313,11 +313,11 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, if (CSI.empty()) return; std::vector<MachineMove> &Moves = MMI.getFrameMoves(); - const TargetData *TD = TM.getTargetData(); + const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); bool HasFP = hasFP(MF); // Calculate amount of bytes used for return address storing. - int stackGrowth = -TD->getPointerSize(); + int stackGrowth = -RegInfo->getSlotSize(); // FIXME: This is dirty hack. The code itself is pretty mess right now. // It should be rewritten from scratch and generalized sometimes. @@ -674,7 +674,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). - if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && + if (Is64Bit && !Fn->getFnAttributes().hasAttribute(Attributes::NoRedZone) && !RegInfo->needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. @@ -715,9 +715,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // ELSE => DW_CFA_offset_extended std::vector<MachineMove> &Moves = MMI.getFrameMoves(); - const TargetData *TD = MF.getTarget().getTargetData(); uint64_t NumBytes = 0; - int stackGrowth = -TD->getPointerSize(); + int stackGrowth = -SlotSize; if (HasFP) { // Calculate required stack adjustment. @@ -836,8 +835,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MI->getOperand(3).setIsDead(); } - DL = MBB.findDebugLoc(MBBI); - // If there is an SUB32ri of ESP immediately before this instruction, merge // the two. This can be the case when tail call elimination is enabled and // the callee has more arguments then the caller. diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 27195b4..99f5574 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -100,6 +100,7 @@ namespace { Base_Reg = Reg; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dump() { dbgs() << "X86ISelAddressMode " << this << '\n'; dbgs() << "Base_Reg "; @@ -133,6 +134,7 @@ namespace { dbgs() << "nul"; dbgs() << " JT" << JT << " Align" << Align << '\n'; } +#endif }; } @@ -189,7 +191,6 @@ namespace { SDNode *Select(SDNode *N); SDNode *SelectGather(SDNode *N, unsigned Opc); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); - SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); @@ -244,13 +245,15 @@ namespace { else if (AM.CP) Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp, AM.SymbolFlags); - else if (AM.ES) + else if (AM.ES) { + assert(!AM.Disp && "Non-zero displacement is ignored with ES."); Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); - else if (AM.JT != -1) + } else if (AM.JT != -1) { + assert(!AM.Disp && "Non-zero displacement is ignored with JT."); Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); - else if (AM.BlockAddr) - Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32, - true, AM.SymbolFlags); + } else if (AM.BlockAddr) + Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp, + AM.SymbolFlags); else Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); @@ -359,7 +362,7 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { /// MoveBelowCallOrigChain - Replace the original chain operand of the call with /// load's chain operand and move load below the call's chain operand. static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, - SDValue Call, SDValue OrigChain) { + SDValue Call, SDValue OrigChain) { SmallVector<SDValue, 8> Ops; SDValue Chain = OrigChain.getOperand(0); if (Chain.getNode() == Load.getNode()) @@ -383,11 +386,13 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size()); CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), Load.getOperand(1), Load.getOperand(2)); + + unsigned NumOps = Call.getNode()->getNumOperands(); Ops.clear(); Ops.push_back(SDValue(Load.getNode(), 1)); - for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) + for (unsigned i = 1, e = NumOps; i != e; ++i) Ops.push_back(Call.getOperand(i)); - CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps); } /// isCalleeLoad - Return true if call address is a load and it can be @@ -396,6 +401,10 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, /// In the case of a tail call, there isn't a callseq node between the call /// chain and the load. static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { + // The transformation is somewhat dangerous if the call's chain was glued to + // the call. After MoveBelowOrigChain the load is moved between the call and + // the chain, this can create a cycle if the load is not folded. So it is + // *really* important that we are sure the load will be folded. if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) return false; LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); @@ -425,7 +434,8 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { void X86DAGToDAGISel::PreprocessISelDAG() { // OptForSize is used in pattern predicates that isel is matching. - OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); + OptForSize = MF->getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { @@ -433,7 +443,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (OptLevel != CodeGenOpt::None && (N->getOpcode() == X86ISD::CALL || - N->getOpcode() == X86ISD::TC_RETURN)) { + (N->getOpcode() == X86ISD::TC_RETURN && + // Only does this if load can be foled into TC_RETURN. + (Subtarget->is64Bit() || + getTargetMachine().getRelocationModel() != Reloc::PIC_)))) { /// Also try moving call address load from outside callseq_start to just /// before the call to allow it to be folded. /// @@ -652,10 +665,16 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { AM.JT = J->getIndex(); AM.SymbolFlags = J->getTargetFlags(); - } else { - AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); - AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); - } + } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) { + X86ISelAddressMode Backup = AM; + AM.BlockAddr = BA->getBlockAddress(); + AM.SymbolFlags = BA->getTargetFlags(); + if (FoldOffsetIntoAddress(BA->getOffset(), AM)) { + AM = Backup; + return true; + } + } else + llvm_unreachable("Unhandled symbol reference node."); if (N.getOpcode() == X86ISD::WrapperRIP) AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); @@ -684,10 +703,12 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { AM.JT = J->getIndex(); AM.SymbolFlags = J->getTargetFlags(); - } else { - AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); - AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); - } + } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) { + AM.BlockAddr = BA->getBlockAddress(); + AM.Disp += BA->getOffset(); + AM.SymbolFlags = BA->getTargetFlags(); + } else + llvm_unreachable("Unhandled symbol reference node."); return false; } @@ -1011,7 +1032,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM.IndexReg = ShVal.getNode()->getOperand(0); ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); - uint64_t Disp = AddVal->getSExtValue() << Val; + uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val; if (!FoldOffsetIntoAddress(Disp, AM)) return false; } @@ -1281,7 +1302,9 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, // that are not a MemSDNode, and thus don't have proper addrspace info. Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores - Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme + Parent->getOpcode() != X86ISD::TLSCALL && // Fixme + Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp + Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace(); // AddrSpace 256 -> GS, 257 -> FS. @@ -1468,6 +1491,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDValue In1 = Node->getOperand(1); SDValue In2L = Node->getOperand(2); SDValue In2H = Node->getOperand(3); + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return NULL; @@ -1481,159 +1505,13 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { return ResNode; } -// FIXME: Figure out some way to unify this with the 'or' and other code -// below. -SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { - if (Node->hasAnyUseOfValue(0)) - return 0; - - // Optimize common patterns for __sync_add_and_fetch and - // __sync_sub_and_fetch where the result is not used. This allows us - // to use "lock" version of add, sub, inc, dec instructions. - // FIXME: Do not use special instructions but instead add the "lock" - // prefix to the target node somehow. The extra information will then be - // transferred to machine instruction and it denotes the prefix. - SDValue Chain = Node->getOperand(0); - SDValue Ptr = Node->getOperand(1); - SDValue Val = Node->getOperand(2); - SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) - return 0; - - bool isInc = false, isDec = false, isSub = false, isCN = false; - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); - if (CN && CN->getSExtValue() == (int32_t)CN->getSExtValue()) { - isCN = true; - int64_t CNVal = CN->getSExtValue(); - if (CNVal == 1) - isInc = true; - else if (CNVal == -1) - isDec = true; - else if (CNVal >= 0) - Val = CurDAG->getTargetConstant(CNVal, NVT); - else { - isSub = true; - Val = CurDAG->getTargetConstant(-CNVal, NVT); - } - } else if (Val.hasOneUse() && - Val.getOpcode() == ISD::SUB && - X86::isZeroNode(Val.getOperand(0))) { - isSub = true; - Val = Val.getOperand(1); - } - - DebugLoc dl = Node->getDebugLoc(); - unsigned Opc = 0; - switch (NVT.getSimpleVT().SimpleTy) { - default: return 0; - case MVT::i8: - if (isInc) - Opc = X86::LOCK_INC8m; - else if (isDec) - Opc = X86::LOCK_DEC8m; - else if (isSub) { - if (isCN) - Opc = X86::LOCK_SUB8mi; - else - Opc = X86::LOCK_SUB8mr; - } else { - if (isCN) - Opc = X86::LOCK_ADD8mi; - else - Opc = X86::LOCK_ADD8mr; - } - break; - case MVT::i16: - if (isInc) - Opc = X86::LOCK_INC16m; - else if (isDec) - Opc = X86::LOCK_DEC16m; - else if (isSub) { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_SUB16mi8; - else - Opc = X86::LOCK_SUB16mi; - } else - Opc = X86::LOCK_SUB16mr; - } else { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_ADD16mi8; - else - Opc = X86::LOCK_ADD16mi; - } else - Opc = X86::LOCK_ADD16mr; - } - break; - case MVT::i32: - if (isInc) - Opc = X86::LOCK_INC32m; - else if (isDec) - Opc = X86::LOCK_DEC32m; - else if (isSub) { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_SUB32mi8; - else - Opc = X86::LOCK_SUB32mi; - } else - Opc = X86::LOCK_SUB32mr; - } else { - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_ADD32mi8; - else - Opc = X86::LOCK_ADD32mi; - } else - Opc = X86::LOCK_ADD32mr; - } - break; - case MVT::i64: - if (isInc) - Opc = X86::LOCK_INC64m; - else if (isDec) - Opc = X86::LOCK_DEC64m; - else if (isSub) { - Opc = X86::LOCK_SUB64mr; - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_SUB64mi8; - else if (i64immSExt32(Val.getNode())) - Opc = X86::LOCK_SUB64mi32; - } - } else { - Opc = X86::LOCK_ADD64mr; - if (isCN) { - if (immSext8(Val.getNode())) - Opc = X86::LOCK_ADD64mi8; - else if (i64immSExt32(Val.getNode())) - Opc = X86::LOCK_ADD64mi32; - } - } - break; - } - - SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, NVT), 0); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); - if (isInc || isDec) { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; - SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0); - cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); - SDValue RetVals[] = { Undef, Ret }; - return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); - } else { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; - SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); - cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); - SDValue RetVals[] = { Undef, Ret }; - return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); - } -} - +/// Atomic opcode table +/// enum AtomicOpc { + ADD, + SUB, + INC, + DEC, OR, AND, XOR, @@ -1657,6 +1535,58 @@ enum AtomicSz { static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { { + X86::LOCK_ADD8mi, + X86::LOCK_ADD8mr, + X86::LOCK_ADD16mi8, + X86::LOCK_ADD16mi, + X86::LOCK_ADD16mr, + X86::LOCK_ADD32mi8, + X86::LOCK_ADD32mi, + X86::LOCK_ADD32mr, + X86::LOCK_ADD64mi8, + X86::LOCK_ADD64mi32, + X86::LOCK_ADD64mr, + }, + { + X86::LOCK_SUB8mi, + X86::LOCK_SUB8mr, + X86::LOCK_SUB16mi8, + X86::LOCK_SUB16mi, + X86::LOCK_SUB16mr, + X86::LOCK_SUB32mi8, + X86::LOCK_SUB32mi, + X86::LOCK_SUB32mr, + X86::LOCK_SUB64mi8, + X86::LOCK_SUB64mi32, + X86::LOCK_SUB64mr, + }, + { + 0, + X86::LOCK_INC8m, + 0, + 0, + X86::LOCK_INC16m, + 0, + 0, + X86::LOCK_INC32m, + 0, + 0, + X86::LOCK_INC64m, + }, + { + 0, + X86::LOCK_DEC8m, + 0, + 0, + X86::LOCK_DEC16m, + 0, + 0, + X86::LOCK_DEC32m, + 0, + 0, + X86::LOCK_DEC64m, + }, + { X86::LOCK_OR8mi, X86::LOCK_OR8mr, X86::LOCK_OR16mi8, @@ -1667,7 +1597,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { X86::LOCK_OR32mr, X86::LOCK_OR64mi8, X86::LOCK_OR64mi32, - X86::LOCK_OR64mr + X86::LOCK_OR64mr, }, { X86::LOCK_AND8mi, @@ -1680,7 +1610,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { X86::LOCK_AND32mr, X86::LOCK_AND64mi8, X86::LOCK_AND64mi32, - X86::LOCK_AND64mr + X86::LOCK_AND64mr, }, { X86::LOCK_XOR8mi, @@ -1693,18 +1623,74 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { X86::LOCK_XOR32mr, X86::LOCK_XOR64mi8, X86::LOCK_XOR64mi32, - X86::LOCK_XOR64mr + X86::LOCK_XOR64mr, } }; +// Return the target constant operand for atomic-load-op and do simple +// translations, such as from atomic-load-add to lock-sub. The return value is +// one of the following 3 cases: +// + target-constant, the operand could be supported as a target constant. +// + empty, the operand is not needed any more with the new op selected. +// + non-empty, otherwise. +static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, + DebugLoc dl, + enum AtomicOpc &Op, EVT NVT, + SDValue Val) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) { + int64_t CNVal = CN->getSExtValue(); + // Quit if not 32-bit imm. + if ((int32_t)CNVal != CNVal) + return Val; + // For atomic-load-add, we could do some optimizations. + if (Op == ADD) { + // Translate to INC/DEC if ADD by 1 or -1. + if ((CNVal == 1) || (CNVal == -1)) { + Op = (CNVal == 1) ? INC : DEC; + // No more constant operand after being translated into INC/DEC. + return SDValue(); + } + // Translate to SUB if ADD by negative value. + if (CNVal < 0) { + Op = SUB; + CNVal = -CNVal; + } + } + return CurDAG->getTargetConstant(CNVal, NVT); + } + + // If the value operand is single-used, try to optimize it. + if (Op == ADD && Val.hasOneUse()) { + // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x). + if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) { + Op = SUB; + return Val.getOperand(1); + } + // A special case for i16, which needs truncating as, in most cases, it's + // promoted to i32. We will translate + // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x)) + if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 && + Val.getOperand(0).getOpcode() == ISD::SUB && + X86::isZeroNode(Val.getOperand(0).getOperand(0))) { + Op = SUB; + Val = Val.getOperand(0); + return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT, + Val.getOperand(1)); + } + } + + return Val; +} + SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { if (Node->hasAnyUseOfValue(0)) return 0; + DebugLoc dl = Node->getDebugLoc(); + // Optimize common patterns for __sync_or_and_fetch and similar arith // operations where the result is not used. This allows us to use the "lock" // version of the arithmetic instruction. - // FIXME: Same as for 'add' and 'sub', try to merge those down here. SDValue Chain = Node->getOperand(0); SDValue Ptr = Node->getOperand(1); SDValue Val = Node->getOperand(2); @@ -1715,6 +1701,8 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { // Which index into the table. enum AtomicOpc Op; switch (Node->getOpcode()) { + default: + return 0; case ISD::ATOMIC_LOAD_OR: Op = OR; break; @@ -1724,16 +1712,14 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { case ISD::ATOMIC_LOAD_XOR: Op = XOR; break; - default: - return 0; - } - - bool isCN = false; - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); - if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) { - isCN = true; - Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT); + case ISD::ATOMIC_LOAD_ADD: + Op = ADD; + break; } + + Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val); + bool isUnOp = !Val.getNode(); + bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant); unsigned Opc = 0; switch (NVT.getSimpleVT().SimpleTy) { @@ -1775,13 +1761,20 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { assert(Opc != 0 && "Invalid arith lock transform!"); - DebugLoc dl = Node->getDebugLoc(); + SDValue Ret; SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; - SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); + if (isUnOp) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, + array_lengthof(Ops)), 0); + } else { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, + array_lengthof(Ops)), 0); + } cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); SDValue RetVals[] = { Undef, Ret }; return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); @@ -2059,6 +2052,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::ATOMSUB64_DAG: case X86ISD::ATOMNAND64_DAG: case X86ISD::ATOMAND64_DAG: + case X86ISD::ATOMMAX64_DAG: + case X86ISD::ATOMMIN64_DAG: + case X86ISD::ATOMUMAX64_DAG: + case X86ISD::ATOMUMIN64_DAG: case X86ISD::ATOMSWAP64_DAG: { unsigned Opc; switch (Opcode) { @@ -2069,6 +2066,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break; case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break; case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break; + case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break; + case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break; + case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break; + case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break; case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break; } SDNode *RetVal = SelectAtomic64(Node, Opc); @@ -2077,15 +2078,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { break; } - case ISD::ATOMIC_LOAD_ADD: { - SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); - if (RetVal) - return RetVal; - break; - } case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: { + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_ADD: { SDNode *RetVal = SelectAtomicLoadArith(Node, NVT); if (RetVal) return RetVal; @@ -2116,7 +2112,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Make sure that we don't change the operation by removing bits. // This only matters for OR and XOR, AND is unaffected. - if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val) + uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1; + if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) break; unsigned ShlOp, Op; @@ -2199,13 +2196,16 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue N1 = Node->getOperand(1); bool isSigned = Opcode == ISD::SMUL_LOHI; + bool hasBMI2 = Subtarget->hasBMI2(); if (!isSigned) { switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; - case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; - case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; + case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r; + MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break; + case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r; + MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break; } } else { switch (NVT.getSimpleVT().SimpleTy) { @@ -2217,13 +2217,31 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } } - unsigned LoReg, HiReg; - switch (NVT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unsupported VT!"); - case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; - case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; - case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; - case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; + unsigned SrcReg, LoReg, HiReg; + switch (Opc) { + default: llvm_unreachable("Unknown MUL opcode!"); + case X86::IMUL8r: + case X86::MUL8r: + SrcReg = LoReg = X86::AL; HiReg = X86::AH; + break; + case X86::IMUL16r: + case X86::MUL16r: + SrcReg = LoReg = X86::AX; HiReg = X86::DX; + break; + case X86::IMUL32r: + case X86::MUL32r: + SrcReg = LoReg = X86::EAX; HiReg = X86::EDX; + break; + case X86::IMUL64r: + case X86::MUL64r: + SrcReg = LoReg = X86::RAX; HiReg = X86::RDX; + break; + case X86::MULX32rr: + SrcReg = X86::EDX; LoReg = HiReg = 0; + break; + case X86::MULX64rr: + SrcReg = X86::RDX; LoReg = HiReg = 0; + break; } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; @@ -2235,22 +2253,47 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { std::swap(N0, N1); } - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg, N0, SDValue()).getValue(1); + SDValue ResHi, ResLo; if (foldedLoad) { + SDValue Chain; SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; - SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, - array_lengthof(Ops)); - InFlag = SDValue(CNode, 1); + if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops, + array_lengthof(Ops)); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + Chain = SDValue(CNode, 2); + InFlag = SDValue(CNode, 3); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops, + array_lengthof(Ops)); + Chain = SDValue(CNode, 0); + InFlag = SDValue(CNode, 1); + } // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); + ReplaceUses(N1.getValue(1), Chain); } else { - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag); - InFlag = SDValue(CNode, 0); + SDValue Ops[] = { N1, InFlag }; + if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, + array_lengthof(Ops)); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + InFlag = SDValue(CNode, 2); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, + array_lengthof(Ops)); + InFlag = SDValue(CNode, 0); + } } // Prevent use of AH in a REX instruction by referencing AX instead. @@ -2275,19 +2318,25 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - LoReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); + if (ResLo.getNode() == 0) { + assert(LoReg && "Register for low half is not defined!"); + ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, + InFlag); + InFlag = ResLo.getValue(2); + } + ReplaceUses(SDValue(Node, 0), ResLo); + DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); + if (ResHi.getNode() == 0) { + assert(HiReg && "Register for high half is not defined!"); + ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT, + InFlag); + InFlag = ResHi.getValue(2); + } + ReplaceUses(SDValue(Node, 1), ResHi); + DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); } return NULL; @@ -2488,7 +2537,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i8, Reg); // Emit a testb. - return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testl %eax, $2048" to "testb %ah, $8". @@ -2519,8 +2574,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only // target GR8_NOREX registers, so make sure the register class is // forced. - return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32, - Subreg, ShiftedImm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, + MVT::i32, Subreg, ShiftedImm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testl %eax, $32776" to "testw %ax, $32776". @@ -2536,7 +2596,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i16, Reg); // Emit a testw. - return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testq %rax, $268468232" to "testl %eax, $268468232". @@ -2552,7 +2618,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i32, Reg); // Emit a testl. - return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } } break; @@ -2607,85 +2679,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return Result; } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPESTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - SDValue N3 = Node->getOperand(3); - SDValue N4 = Node->getOperand(4); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N4); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, - X86::EAX, N1, SDValue()).getValue(1); - InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, - N3, InFlag).getValue(1); - - SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : - X86::PCMPESTRIrr; - InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } - - // FIXME: Custom handling because TableGen doesn't support multiple implicit - // defs in an instruction pattern - case X86ISD::PCMPISTRI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - - // Make sure last argument is a constant - ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N2); - if (!Cst) - break; - - uint64_t Imm = Cst->getZExtValue(); - - SDValue Ops[] = { N0, N1, getI8Imm(Imm) }; - unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : - X86::PCMPISTRIrr; - SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, - array_lengthof(Ops)), 0); - - if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::ECX, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - } - if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::EFLAGS, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - } - - return NULL; - } } SDNode *ResNode = SelectCode(Node); diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index 4af12e4..b35fb51 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -85,7 +85,7 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128) * ElemsPerChunk); - SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); + SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); @@ -118,7 +118,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128) * ElemsPerChunk); - SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); + SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); } @@ -158,10 +158,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) Subtarget = &TM.getSubtarget<X86Subtarget>(); X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf32 = Subtarget->hasSSE1(); - X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; RegInfo = TM.getRegisterInfo(); - TD = getTargetData(); + TD = getDataLayout(); // Set up the TargetLowering object. static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; @@ -180,7 +179,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setSchedulingPreference(Sched::ILP); else setSchedulingPreference(Sched::RegPressure); - setStackPointerRegisterToSaveRestore(X86StackPtr); + setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); + + // Bypass i32 with i8 on Atom when compiling with O2 + if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) + addBypassSlowDiv(32, 8); if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) { // Setup Windows compiler runtime calls. @@ -453,6 +456,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SETCC , MVT::i64 , Custom); } setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); + // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support + // SjLj exception handling but a light-weight setjmp/longjmp replacement to + // support continuation, user-level threading, and etc.. As a result, no + // other SjLj exception interfaces are implemented and please don't build + // your own exception handling based on them. + // LLVM/Clang supports zero-cost DWARF exception handling. + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); // Darwin ABI issue. setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); @@ -510,6 +521,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); } if (Subtarget->hasCmpxchg16b()) { @@ -541,6 +556,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::TRAP, MVT::Other, Legal); + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); @@ -737,6 +753,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FFLOOR, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand); @@ -826,6 +843,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::FNEG, MVT::v4f32, Custom); + setOperationAction(ISD::FABS, MVT::v4f32, Custom); setOperationAction(ISD::LOAD, MVT::v4f32, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); @@ -859,6 +877,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v2f64, Legal); setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Custom); + setOperationAction(ISD::FABS, MVT::v2f64, Custom); setOperationAction(ISD::SETCC, MVT::v2i64, Custom); setOperationAction(ISD::SETCC, MVT::v16i8, Custom); @@ -927,6 +946,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + + setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); + // As there is no 64-bit GPR available, we need build a special custom + // sequence to convert from v2i32 to v2f32. + if (!Subtarget->is64Bit()) + setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); + + setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); + setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); + + setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal); } if (Subtarget->hasSSE41()) { @@ -941,6 +972,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FRINT, MVT::f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); + // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); @@ -1018,19 +1052,33 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FMUL, MVT::v8f32, Legal); setOperationAction(ISD::FDIV, MVT::v8f32, Legal); setOperationAction(ISD::FSQRT, MVT::v8f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal); setOperationAction(ISD::FNEG, MVT::v8f32, Custom); + setOperationAction(ISD::FABS, MVT::v8f32, Custom); setOperationAction(ISD::FADD, MVT::v4f64, Legal); setOperationAction(ISD::FSUB, MVT::v4f64, Legal); setOperationAction(ISD::FMUL, MVT::v4f64, Legal); setOperationAction(ISD::FDIV, MVT::v4f64, Legal); setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); + setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); setOperationAction(ISD::FNEG, MVT::v4f64, Custom); + setOperationAction(ISD::FABS, MVT::v4f64, Custom); + + setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); + + setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); + + setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal); + setOperationAction(ISD::SRL, MVT::v16i16, Custom); setOperationAction(ISD::SRL, MVT::v32i8, Custom); @@ -1054,7 +1102,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); - if (Subtarget->hasFMA()) { + if (Subtarget->hasFMA() || Subtarget->hasFMA4()) { setOperationAction(ISD::FMA, MVT::v8f32, Custom); setOperationAction(ISD::FMA, MVT::v4f64, Custom); setOperationAction(ISD::FMA, MVT::v4f32, Custom); @@ -1219,10 +1267,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::TRUNCATE); - setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::SETCC); - setTargetDAGCombine(ISD::FP_TO_SINT); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::XOR); @@ -1320,7 +1366,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, // cases like PR2962. This should be removed when PR2962 is fixed. const Function *F = MF.getFunction(); if (IsZeroVal && - !F->hasFnAttr(Attribute::NoImplicitFloat)) { + !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) { if (Size >= 16 && (Subtarget->isUnalignedMemAccessFast() || ((DstAlign == 0 || DstAlign >= 16) && @@ -1988,7 +2034,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, TotalNumIntRegs); - bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); + bool NoImplicitFloatOps = Fn->getFnAttributes(). + hasAttribute(Attributes::NoImplicitFloat); assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat && @@ -2136,16 +2183,14 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, /// optimization is performed and it is required (FPDiff!=0). static SDValue EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, - SDValue Chain, SDValue RetAddrFrIdx, - bool Is64Bit, int FPDiff, DebugLoc dl) { + SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, + unsigned SlotSize, int FPDiff, DebugLoc dl) { // Store the return address to the appropriate stack slot. if (!FPDiff) return Chain; // Calculate the new stack slot for the return address. - int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false); - EVT VT = Is64Bit ? MVT::i64 : MVT::i32; - SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); + SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, MachinePointerInfo::getFixedStack(NewReturnAddrFI), false, false, 0); @@ -2180,7 +2225,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, SR != NotStructReturn, - MF.getFunction()->hasStructRetAttr(), + MF.getFunction()->hasStructRetAttr(), CLI.RetTy, Outs, OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require @@ -2220,14 +2265,15 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, int FPDiff = 0; if (isTailCall && !IsSibcall) { // Lower arguments at fp - stackoffset + fpdiff. - unsigned NumBytesCallerPushed = - MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); + X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>(); + unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); + FPDiff = NumBytesCallerPushed - NumBytes; // Set the delta of movement of the returnaddr stackslot. // But only set if delta is greater than previous delta. - if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta())) - MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); + if (FPDiff < X86Info->getTCReturnAddrDelta()) + X86Info->setTCReturnAddrDelta(FPDiff); } if (!IsSibcall) @@ -2304,7 +2350,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } else if (!IsSibcall && (!isTailCall || isByVal)) { assert(VA.isMemLoc()); if (StackPtr.getNode() == 0) - StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy()); + StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), + getPointerTy()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, dl, DAG, VA, Flags)); } @@ -2392,7 +2439,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Copy relative to framepointer. SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); if (StackPtr.getNode() == 0) - StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, + StackPtr = DAG.getCopyFromReg(Chain, dl, + RegInfo->getStackRegister(), getPointerTy()); Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source); @@ -2414,7 +2462,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, &MemOpChains2[0], MemOpChains2.size()); // Store the return address to the appropriate stack slot. - Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit, + Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, + getPointerTy(), RegInfo->getSlotSize(), FPDiff, dl); } @@ -2464,7 +2513,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, OpFlags = X86II::MO_DARWIN_STUB; } else if (Subtarget->isPICStyleRIPRel() && isa<Function>(GV) && - cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) { + cast<Function>(GV)->getFnAttributes(). + hasAttribute(Attributes::NonLazyBind)) { // If the function is marked as non-lazy, generate an indirect call // which loads from the GOT directly. This avoids runtime overhead // at the cost of eager binding (and one extra byte of encoding). @@ -2625,7 +2675,7 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, unsigned StackAlignment = TFI.getStackAlignment(); uint64_t AlignMask = StackAlignment - 1; int64_t Offset = StackSize; - uint64_t SlotSize = TD->getPointerSize(); + unsigned SlotSize = RegInfo->getSlotSize(); if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { // Number smaller than 12 so just add the difference. Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); @@ -2700,6 +2750,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, + Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -2711,6 +2762,13 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // If -tailcallopt is specified, make fastcc functions tail-callable. const MachineFunction &MF = DAG.getMachineFunction(); const Function *CallerF = DAG.getMachineFunction().getFunction(); + + // If the function return type is x86_fp80 and the callee return type is not, + // then the FP_EXTEND of the call result is not a nop. It's not safe to + // perform a tailcall optimization here. + if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty()) + return false; + CallingConv::ID CallerCC = CallerF->getCallingConv(); bool CCMatch = CallerCC == CalleeCC; @@ -2834,7 +2892,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); const X86InstrInfo *TII = - ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); + ((const X86TargetMachine&)getTargetMachine()).getInstrInfo(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; @@ -2985,7 +3043,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. - uint64_t SlotSize = TD->getPointerSize(); + unsigned SlotSize = RegInfo->getSlotSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, false); FuncInfo->setRAIndex(ReturnAddrIndex); @@ -3508,25 +3566,26 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i])) MatchOddMask = false; } - static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1}; - static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1}; - const int *CompactionMask; - if (MatchEvenMask) - CompactionMask = CompactionMaskEven; - else if (MatchOddMask) - CompactionMask = CompactionMaskOdd; - else + if (!MatchEvenMask && !MatchOddMask) return SDValue(); SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT); - SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0), - UndefNode, CompactionMask); - SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1), - UndefNode, CompactionMask); - static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13}; - return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask); + SDValue Op0 = SVOp->getOperand(0); + SDValue Op1 = SVOp->getOperand(1); + + if (MatchEvenMask) { + // Shift the second operand right to 32 bits. + static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 }; + Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask); + } else { + // Shift the first operand left to 32 bits. + static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 }; + Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask); + } + static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15}; + return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask); } /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand @@ -4577,7 +4636,6 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, MVT ShufVT = V.getValueType().getSimpleVT(); unsigned NumElems = ShufVT.getVectorNumElements(); SmallVector<int, 16> ShuffleMask; - SDValue ImmN; bool IsUnary; if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary)) @@ -4979,6 +5037,18 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, LDBase->getAlignment(), false/*isVolatile*/, true/*ReadMem*/, false/*WriteMem*/); + + // Make sure the newly-created LOAD is in the same position as LDBase in + // terms of dependency. We create a TokenFactor for LDBase and ResNode, and + // update uses of LDBase's output chain to use the TokenFactor. + if (LDBase->hasAnyUseOfValue(1)) { + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); + DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), + SDValue(ResNode.getNode(), 1)); + } + return DAG.getNode(ISD::BITCAST, DL, VT, ResNode); } return SDValue(); @@ -4992,7 +5062,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, /// The VBROADCAST node is returned when a pattern is found, /// or SDValue() otherwise. SDValue -X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { +X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget->hasAVX()) return SDValue(); @@ -5116,80 +5186,78 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { return SDValue(); } -// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64 -// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the -// constraint of matching input/output vector elements. SDValue -X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); - SDNode *N = Op.getNode(); +X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); - unsigned NumElts = Op.getNumOperands(); - // Check supported types and sub-targets. - // - // Only v2f32 -> v2f64 needs special handling. - if (VT != MVT::v2f64 || !Subtarget->hasSSE2()) + // Skip if insert_vec_elt is not supported. + if (!isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT)) return SDValue(); - SDValue VecIn; - EVT VecInVT; - SmallVector<int, 8> Mask; - EVT SrcVT = MVT::Other; + DebugLoc DL = Op.getDebugLoc(); + unsigned NumElems = Op.getNumOperands(); + + SDValue VecIn1; + SDValue VecIn2; + SmallVector<unsigned, 4> InsertIndices; + SmallVector<int, 8> Mask(NumElems, -1); - // Check the patterns could be translated into X86vfpext. - for (unsigned i = 0; i < NumElts; ++i) { - SDValue In = N->getOperand(i); - unsigned Opcode = In.getOpcode(); + for (unsigned i = 0; i != NumElems; ++i) { + unsigned Opc = Op.getOperand(i).getOpcode(); - // Skip if the element is undefined. - if (Opcode == ISD::UNDEF) { - Mask.push_back(-1); + if (Opc == ISD::UNDEF) continue; - } - // Quit if one of the elements is not defined from 'fpext'. - if (Opcode != ISD::FP_EXTEND) - return SDValue(); + if (Opc != ISD::EXTRACT_VECTOR_ELT) { + // Quit if more than 1 elements need inserting. + if (InsertIndices.size() > 1) + return SDValue(); + + InsertIndices.push_back(i); + continue; + } - // Check how the source of 'fpext' is defined. - SDValue L2In = In.getOperand(0); - EVT L2InVT = L2In.getValueType(); + SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0); + SDValue ExtIdx = Op.getOperand(i).getOperand(1); - // Check the original type - if (SrcVT == MVT::Other) - SrcVT = L2InVT; - else if (SrcVT != L2InVT) // Quit if non-homogenous typed. + // Quit if extracted from vector of different type. + if (ExtractedFromVec.getValueType() != VT) return SDValue(); - // Check whether the value being 'fpext'ed is extracted from the same - // source. - Opcode = L2In.getOpcode(); - - // Quit if it's not extracted with a constant index. - if (Opcode != ISD::EXTRACT_VECTOR_ELT || - !isa<ConstantSDNode>(L2In.getOperand(1))) + // Quit if non-constant index. + if (!isa<ConstantSDNode>(ExtIdx)) return SDValue(); - SDValue ExtractedFromVec = L2In.getOperand(0); + if (VecIn1.getNode() == 0) + VecIn1 = ExtractedFromVec; + else if (VecIn1 != ExtractedFromVec) { + if (VecIn2.getNode() == 0) + VecIn2 = ExtractedFromVec; + else if (VecIn2 != ExtractedFromVec) + // Quit if more than 2 vectors to shuffle + return SDValue(); + } - if (VecIn.getNode() == 0) { - VecIn = ExtractedFromVec; - VecInVT = ExtractedFromVec.getValueType(); - } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec. - return SDValue(); + unsigned Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue(); - Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue()); + if (ExtractedFromVec == VecIn1) + Mask[i] = Idx; + else if (ExtractedFromVec == VecIn2) + Mask[i] = Idx + NumElems; } - // Fill the remaining mask as undef. - for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i) - Mask.push_back(-1); + if (VecIn1.getNode() == 0) + return SDValue(); + + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]); + for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) { + unsigned Idx = InsertIndices[i]; + NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx), + DAG.getIntPtrConstant(Idx)); + } - return DAG.getNode(X86ISD::VFPEXT, DL, VT, - DAG.getVectorShuffle(VecInVT, DL, - VecIn, DAG.getUNDEF(VecInVT), - &Mask[0])); + return NV; } SDValue @@ -5224,10 +5292,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (Broadcast.getNode()) return Broadcast; - SDValue FpExt = LowerVectorFpExtend(Op, DAG); - if (FpExt.getNode()) - return FpExt; - unsigned EVTBits = ExtVT.getSizeInBits(); unsigned NumZero = 0; @@ -5472,6 +5536,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (LD.getNode()) return LD; + // Check for a build vector from mostly shuffle plus few inserting. + SDValue Sh = buildFromShuffleMostly(Op, DAG); + if (Sh.getNode()) + return Sh; + // For SSE 4.1, use insertps to put the high elements into the low element. if (getSubtarget()->hasSSE41()) { SDValue Result; @@ -5538,8 +5607,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl); } -SDValue -X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { assert(Op.getNumOperands() == 2); // 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors @@ -5548,9 +5616,9 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { } // Try to lower a shuffle node into a simple blend instruction. -static SDValue LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, - const X86Subtarget *Subtarget, - SelectionDAG &DAG) { +static SDValue +LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, + const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); @@ -5620,9 +5688,9 @@ static SDValue LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, // 2. [ssse3] 1 x pshufb // 3. [ssse3] 2 x pshufb + 1 x por // 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw) -SDValue -X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, - SelectionDAG &DAG) const { +static SDValue +LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); @@ -5879,8 +5947,6 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DebugLoc dl = SVOp->getDebugLoc(); ArrayRef<int> MaskVals = SVOp->getMask(); - bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; - // If we have SSSE3, case 1 is generated when all result bytes come from // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is // present, fall back to case 3. @@ -5904,7 +5970,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); - if (V2IsUndef) + + // As PSHUFB will zero elements with negative indices, it's safe to ignore + // the 2nd operand if it's undefined or zero. + if (V2.getOpcode() == ISD::UNDEF || + ISD::isBuildVectorAllZeros(V2.getNode())) return V1; // Calculate the shuffle mask for the second input, shuffle it, and @@ -5990,6 +6060,51 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV); } +// v32i8 shuffles - Translate to VPSHUFB if possible. +static +SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + EVT VT = SVOp->getValueType(0); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + SmallVector<int, 32> MaskVals(SVOp->getMask().begin(), SVOp->getMask().end()); + + bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; + bool V1IsAllZero = ISD::isBuildVectorAllZeros(V1.getNode()); + bool V2IsAllZero = ISD::isBuildVectorAllZeros(V2.getNode()); + + // VPSHUFB may be generated if + // (1) one of input vector is undefined or zeroinitializer. + // The mask value 0x80 puts 0 in the corresponding slot of the vector. + // And (2) the mask indexes don't cross the 128-bit lane. + if (VT != MVT::v32i8 || !Subtarget->hasAVX2() || + (!V2IsUndef && !V2IsAllZero && !V1IsAllZero)) + return SDValue(); + + if (V1IsAllZero && !V2IsAllZero) { + CommuteVectorShuffleMask(MaskVals, 32); + V1 = V2; + } + SmallVector<SDValue, 32> pshufbMask; + for (unsigned i = 0; i != 32; i++) { + int EltIdx = MaskVals[i]; + if (EltIdx < 0 || EltIdx >= 32) + EltIdx = 0x80; + else { + if ((EltIdx >= 16 && i < 16) || (EltIdx < 16 && i >= 16)) + // Cross lane is not allowed. + return SDValue(); + EltIdx &= 0xf; + } + pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8)); + } + return DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, V1, + DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::v32i8, &pshufbMask[0], 32)); +} + /// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide /// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be /// done when every pair / quad of shuffle mask elements point to elements in @@ -6324,17 +6439,17 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { } static bool MayFoldVectorLoad(SDValue V) { - if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) + while (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); + if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR) V = V.getOperand(0); if (V.hasOneUse() && V.getOpcode() == ISD::BUILD_VECTOR && V.getNumOperands() == 2 && V.getOperand(1).getOpcode() == ISD::UNDEF) // BUILD_VECTOR (load), undef V = V.getOperand(0); - if (MayFoldLoad(V)) - return true; - return false; + + return MayFoldLoad(V); } // FIXME: the version above should always be used. Since there's @@ -6457,6 +6572,81 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { getShuffleSHUFImmediate(SVOp), DAG); } +// Reduce a vector shuffle to zext. +SDValue +X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { + // PMOVZX is only available from SSE41. + if (!Subtarget->hasSSE41()) + return SDValue(); + + EVT VT = Op.getValueType(); + + // Only AVX2 support 256-bit vector integer extending. + if (!Subtarget->hasAVX2() && VT.is256BitVector()) + return SDValue(); + + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + DebugLoc DL = Op.getDebugLoc(); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + unsigned NumElems = VT.getVectorNumElements(); + + // Extending is an unary operation and the element type of the source vector + // won't be equal to or larger than i64. + if (V2.getOpcode() != ISD::UNDEF || !VT.isInteger() || + VT.getVectorElementType() == MVT::i64) + return SDValue(); + + // Find the expansion ratio, e.g. expanding from i8 to i32 has a ratio of 4. + unsigned Shift = 1; // Start from 2, i.e. 1 << 1. + while ((1U << Shift) < NumElems) { + if (SVOp->getMaskElt(1U << Shift) == 1) + break; + Shift += 1; + // The maximal ratio is 8, i.e. from i8 to i64. + if (Shift > 3) + return SDValue(); + } + + // Check the shuffle mask. + unsigned Mask = (1U << Shift) - 1; + for (unsigned i = 0; i != NumElems; ++i) { + int EltIdx = SVOp->getMaskElt(i); + if ((i & Mask) != 0 && EltIdx != -1) + return SDValue(); + if ((i & Mask) == 0 && (unsigned)EltIdx != (i >> Shift)) + return SDValue(); + } + + unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift; + EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift); + + if (!isTypeLegal(NVT)) + return SDValue(); + + // Simplify the operand as it's prepared to be fed into shuffle. + unsigned SignificantBits = NVT.getSizeInBits() >> Shift; + if (V1.getOpcode() == ISD::BITCAST && + V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && + V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && + V1.getOperand(0) + .getOperand(0).getValueType().getSizeInBits() == SignificantBits) { + // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x) + SDValue V = V1.getOperand(0).getOperand(0).getOperand(0); + ConstantSDNode *CIdx = + dyn_cast<ConstantSDNode>(V1.getOperand(0).getOperand(0).getOperand(1)); + // If it's foldable, i.e. normal load with single use, we will let code + // selection to fold it. Otherwise, we will short the conversion sequence. + if (CIdx && CIdx->getZExtValue() == 0 && + (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) + V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V); + } + + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getNode(X86ISD::VZEXT, DL, NVT, V1)); +} + SDValue X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); @@ -6487,6 +6677,11 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { return PromoteSplat(SVOp, DAG); } + // Check integer expanding shuffles. + SDValue NewOp = lowerVectorIntExtend(Op, DAG); + if (NewOp.getNode()) + return NewOp; + // If the shuffle can be profitably rewritten as a narrower shuffle, then // do it! if (VT == MVT::v8i16 || VT == MVT::v16i8 || @@ -6536,7 +6731,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool HasAVX = Subtarget->hasAVX(); bool HasAVX2 = Subtarget->hasAVX2(); MachineFunction &MF = DAG.getMachineFunction(); - bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptForSize = MF.getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize); assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); @@ -6805,7 +7001,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { - SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG); + SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, Subtarget, DAG); if (NewOp.getNode()) return NewOp; } @@ -6816,6 +7012,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return NewOp; } + if (VT == MVT::v32i8) { + SDValue NewOp = LowerVECTOR_SHUFFLEv32i8(SVOp, Subtarget, DAG); + if (NewOp.getNode()) + return NewOp; + } + // Handle all 128-bit wide vectors with 4 elements, and match them with // several different shuffle types. if (NumElems == 4 && VT.is128BitVector()) @@ -6839,9 +7041,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, if (VT.getSizeInBits() == 8) { SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, - Op.getOperand(0), Op.getOperand(1)); + Op.getOperand(0), Op.getOperand(1)); SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract, - DAG.getValueType(VT)); + DAG.getValueType(VT)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); } @@ -6856,9 +7058,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, Op.getOperand(0)), Op.getOperand(1))); SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, - Op.getOperand(0), Op.getOperand(1)); + Op.getOperand(0), Op.getOperand(1)); SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract, - DAG.getValueType(VT)); + DAG.getValueType(VT)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); } @@ -6942,9 +7144,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // Transform it so it match pextrw which produces a 32-bit result. EVT EltVT = MVT::i32; SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT, - Op.getOperand(0), Op.getOperand(1)); + Op.getOperand(0), Op.getOperand(1)); SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract, - DAG.getValueType(VT)); + DAG.getValueType(VT)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); } @@ -7087,8 +7289,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue -X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); EVT OpVT = Op.getValueType(); @@ -7120,8 +7321,8 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in // a simple subregister reference or explicit instructions to grab // upper bits of a vector. -SDValue -X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { if (Subtarget->hasAVX()) { DebugLoc dl = Op.getNode()->getDebugLoc(); SDValue Vec = Op.getNode()->getOperand(0); @@ -7140,8 +7341,8 @@ X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { // Lower a node with an INSERT_SUBVECTOR opcode. This may result in a // simple superregister reference or explicit instructions to insert // the upper bits of a vector. -SDValue -X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { if (Subtarget->hasAVX()) { DebugLoc dl = Op.getNode()->getDebugLoc(); SDValue Vec = Op.getNode()->getOperand(0); @@ -7284,9 +7485,10 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { Subtarget->ClassifyBlockAddressReference(); CodeModel::Model M = getTargetMachine().getCodeModel(); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset(); DebugLoc dl = Op.getDebugLoc(); - SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), - /*isTarget=*/true, OpFlags); + SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(), Offset, + OpFlags); if (Subtarget->isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) @@ -7395,8 +7597,8 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, SDValue InFlag; DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, - DAG.getNode(X86ISD::GlobalBaseReg, - DebugLoc(), PtrVT), InFlag); + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc(), PtrVT), InFlag); InFlag = Chain.getValue(1); return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD); @@ -7897,11 +8099,29 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, return Sub; } +SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, + SelectionDAG &DAG) const { + SDValue N0 = Op.getOperand(0); + EVT SVT = N0.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + + assert((SVT == MVT::v4i8 || SVT == MVT::v4i16 || + SVT == MVT::v8i8 || SVT == MVT::v8i16) && + "Custom UINT_TO_FP is not supported!"); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, SVT.getVectorNumElements()); + return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), + DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0)); +} + SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDValue N0 = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); + if (Op.getValueType().isVector()) + return lowerUINT_TO_FP_vec(Op, DAG); + // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform // the optimization here. @@ -8075,10 +8295,66 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co } } +SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + SDValue In = Op.getOperand(0); + EVT SVT = In.getValueType(); + + if (!VT.is256BitVector() || !SVT.is128BitVector() || + VT.getVectorNumElements() != SVT.getVectorNumElements()) + return SDValue(); + + assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!"); + + // AVX2 has better support of integer extending. + if (Subtarget->hasAVX2()) + return DAG.getNode(X86ISD::VZEXT, DL, VT, In); + + SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In); + static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1}; + SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, + DAG.getVectorShuffle(MVT::v8i16, DL, In, DAG.getUNDEF(MVT::v8i16), &Mask[0])); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi); +} + +SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + EVT SVT = Op.getOperand(0).getValueType(); + + if (!VT.is128BitVector() || !SVT.is256BitVector() || + VT.getVectorNumElements() != SVT.getVectorNumElements()) + return SDValue(); + + assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!"); + + unsigned NumElems = VT.getVectorNumElements(); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + NumElems * 2); + + SDValue In = Op.getOperand(0); + SmallVector<int, 16> MaskVec(NumElems * 2, -1); + // Prepare truncation shuffle mask + for (unsigned i = 0; i != NumElems; ++i) + MaskVec[i] = i * 2; + SDValue V = DAG.getVectorShuffle(NVT, DL, + DAG.getNode(ISD::BITCAST, DL, NVT, In), + DAG.getUNDEF(NVT), &MaskVec[0]); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, + DAG.getIntPtrConstant(0)); +} + SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { - if (Op.getValueType().isVector()) + if (Op.getValueType().isVector()) { + if (Op.getValueType() == MVT::v8i16) + return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), Op.getValueType(), + DAG.getNode(ISD::FP_TO_SINT, Op.getDebugLoc(), + MVT::v8i32, Op.getOperand(0))); return SDValue(); + } std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, /*IsSigned=*/ true, /*IsReplace=*/ false); @@ -8113,26 +8389,49 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, return FIST; } -SDValue X86TargetLowering::LowerFABS(SDValue Op, - SelectionDAG &DAG) const { +SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + SDValue In = Op.getOperand(0); + EVT SVT = In.getValueType(); + + assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); + + return DAG.getNode(X86ISD::VFPEXT, DL, VT, + DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, + In, DAG.getUNDEF(SVT))); +} + +SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); EVT EltVT = VT; - if (VT.isVector()) + unsigned NumElts = VT == MVT::f64 ? 2 : 4; + if (VT.isVector()) { EltVT = VT.getVectorElementType(); - Constant *C; - if (EltVT == MVT::f64) { - C = ConstantVector::getSplat(2, - ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))))); - } else { - C = ConstantVector::getSplat(4, - ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))))); + NumElts = VT.getVectorNumElements(); } - SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + Constant *C; + if (EltVT == MVT::f64) + C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))); + else + C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))); + C = ConstantVector::getSplat(NumElts, C); + SDValue CPIdx = DAG.getConstantPool(C, getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, false, 16); + false, false, false, Alignment); + if (VT.isVector()) { + MVT ANDVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; + return DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getNode(ISD::AND, dl, ANDVT, + DAG.getNode(ISD::BITCAST, dl, ANDVT, + Op.getOperand(0)), + DAG.getNode(ISD::BITCAST, dl, ANDVT, Mask))); + } return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); } @@ -8152,10 +8451,11 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { else C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))); C = ConstantVector::getSplat(NumElts, C); - SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + SDValue CPIdx = DAG.getConstantPool(C, getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, false, 16); + false, false, false, Alignment); if (VT.isVector()) { MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; return DAG.getNode(ISD::BITCAST, dl, VT, @@ -8241,7 +8541,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit); } -SDValue X86TargetLowering::LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { SDValue N0 = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); @@ -8252,6 +8552,98 @@ SDValue X86TargetLowering::LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT)); } +// LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able. +// +SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree."); + + if (!Subtarget->hasSSE41()) + return SDValue(); + + if (!Op->hasOneUse()) + return SDValue(); + + SDNode *N = Op.getNode(); + DebugLoc DL = N->getDebugLoc(); + + SmallVector<SDValue, 8> Opnds; + DenseMap<SDValue, unsigned> VecInMap; + EVT VT = MVT::Other; + + // Recognize a special case where a vector is casted into wide integer to + // test all 0s. + Opnds.push_back(N->getOperand(0)); + Opnds.push_back(N->getOperand(1)); + + for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) { + SmallVector<SDValue, 8>::const_iterator I = Opnds.begin() + Slot; + // BFS traverse all OR'd operands. + if (I->getOpcode() == ISD::OR) { + Opnds.push_back(I->getOperand(0)); + Opnds.push_back(I->getOperand(1)); + // Re-evaluate the number of nodes to be traversed. + e += 2; // 2 more nodes (LHS and RHS) are pushed. + continue; + } + + // Quit if a non-EXTRACT_VECTOR_ELT + if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + // Quit if without a constant index. + SDValue Idx = I->getOperand(1); + if (!isa<ConstantSDNode>(Idx)) + return SDValue(); + + SDValue ExtractedFromVec = I->getOperand(0); + DenseMap<SDValue, unsigned>::iterator M = VecInMap.find(ExtractedFromVec); + if (M == VecInMap.end()) { + VT = ExtractedFromVec.getValueType(); + // Quit if not 128/256-bit vector. + if (!VT.is128BitVector() && !VT.is256BitVector()) + return SDValue(); + // Quit if not the same type. + if (VecInMap.begin() != VecInMap.end() && + VT != VecInMap.begin()->first.getValueType()) + return SDValue(); + M = VecInMap.insert(std::make_pair(ExtractedFromVec, 0)).first; + } + M->second |= 1U << cast<ConstantSDNode>(Idx)->getZExtValue(); + } + + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Not extracted from 128-/256-bit vector."); + + unsigned FullMask = (1U << VT.getVectorNumElements()) - 1U; + SmallVector<SDValue, 8> VecIns; + + for (DenseMap<SDValue, unsigned>::const_iterator + I = VecInMap.begin(), E = VecInMap.end(); I != E; ++I) { + // Quit if not all elements are used. + if (I->second != FullMask) + return SDValue(); + VecIns.push_back(I->first); + } + + EVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; + + // Cast all vectors into TestVT for PTEST. + for (unsigned i = 0, e = VecIns.size(); i < e; ++i) + VecIns[i] = DAG.getNode(ISD::BITCAST, DL, TestVT, VecIns[i]); + + // If more than one full vectors are evaluated, OR them first before PTEST. + for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) { + // Each iteration will OR 2 nodes and append the result until there is only + // 1 node left, i.e. the final OR'd value of all vectors. + SDValue LHS = VecIns[Slot]; + SDValue RHS = VecIns[Slot + 1]; + VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS)); + } + + return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, + VecIns.back(), VecIns.back()); +} + /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent. SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, @@ -8285,7 +8677,33 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, unsigned Opcode = 0; unsigned NumOperands = 0; - switch (Op.getNode()->getOpcode()) { + + // Truncate operations may prevent the merge of the SETCC instruction + // and the arithmetic intruction before it. Attempt to truncate the operands + // of the arithmetic instruction and use a reduced bit-width instruction. + bool NeedTruncation = false; + SDValue ArithOp = Op; + if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) { + SDValue Arith = Op->getOperand(0); + // Both the trunc and the arithmetic op need to have one user each. + if (Arith->hasOneUse()) + switch (Arith.getOpcode()) { + default: break; + case ISD::ADD: + case ISD::SUB: + case ISD::AND: + case ISD::OR: + case ISD::XOR: { + NeedTruncation = true; + ArithOp = Arith; + } + } + } + + // NOTICE: In the code below we use ArithOp to hold the arithmetic operation + // which may be the result of a CAST. We use the variable 'Op', which is the + // non-casted variable when we check for possible users. + switch (ArithOp.getOpcode()) { case ISD::ADD: // Due to an isel shortcoming, be conservative if this add is likely to be // selected as part of a load-modify-store instruction. When the root node @@ -8305,7 +8723,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, goto default_case; if (ConstantSDNode *C = - dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) { + dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) { // An add of one will be selected as an INC. if (C->getAPIntValue() == 1) { Opcode = X86ISD::INC; @@ -8341,7 +8759,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC && - (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { + !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) { NonFlagUse = true; break; } @@ -8362,14 +8780,20 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, goto default_case; // Otherwise use a regular EFLAGS-setting instruction. - switch (Op.getNode()->getOpcode()) { + switch (ArithOp.getOpcode()) { default: llvm_unreachable("unexpected operator!"); - case ISD::SUB: - Opcode = X86ISD::SUB; - break; - case ISD::OR: Opcode = X86ISD::OR; break; + case ISD::SUB: Opcode = X86ISD::SUB; break; case ISD::XOR: Opcode = X86ISD::XOR; break; case ISD::AND: Opcode = X86ISD::AND; break; + case ISD::OR: { + if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) { + SDValue EFLAGS = LowerVectorAllZeroTest(Op, DAG); + if (EFLAGS.getNode()) + return EFLAGS; + } + Opcode = X86ISD::OR; + break; + } } NumOperands = 2; @@ -8387,19 +8811,40 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, break; } + // If we found that truncation is beneficial, perform the truncation and + // update 'Op'. + if (NeedTruncation) { + EVT VT = Op.getValueType(); + SDValue WideVal = Op->getOperand(0); + EVT WideVT = WideVal.getValueType(); + unsigned ConvertedOp = 0; + // Use a target machine opcode to prevent further DAGCombine + // optimizations that may separate the arithmetic operations + // from the setcc node. + switch (WideVal.getOpcode()) { + default: break; + case ISD::ADD: ConvertedOp = X86ISD::ADD; break; + case ISD::SUB: ConvertedOp = X86ISD::SUB; break; + case ISD::AND: ConvertedOp = X86ISD::AND; break; + case ISD::OR: ConvertedOp = X86ISD::OR; break; + case ISD::XOR: ConvertedOp = X86ISD::XOR; break; + } + + if (ConvertedOp) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) { + SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0)); + SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1)); + Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1); + } + } + } + if (Opcode == 0) // Emit a CMP with 0, which is the TEST pattern. return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, Op.getValueType())); - if (Opcode == X86ISD::CMP) { - SDValue New = DAG.getNode(Opcode, dl, MVT::i32, Op.getOperand(0), - Op.getOperand(1)); - // We can't replace usage of SUB with CMP. - // The SUB node will be removed later because there is no use of it. - return SDValue(New.getNode(), 0); - } - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); SmallVector<SDValue, 4> Ops; for (unsigned i = 0; i != NumOperands; ++i) @@ -8958,6 +9403,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } + // X86 doesn't have an i8 cmov. If both operands are the result of a truncate + // widen the cmov and push the truncate through. This avoids introducing a new + // branch during isel and doesn't add any extensions. + if (Op.getValueType() == MVT::i8 && + Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) { + SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0); + if (T1.getValueType() == T2.getValueType() && + // Blacklist CopyFromReg to avoid partial register stalls. + T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){ + SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue); + SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VTs, T2, T1, CC, Cond); + return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); + } + } + // X86ISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); @@ -9312,7 +9772,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); Flag = Chain.getValue(1); - Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1); + Chain = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), + SPTy).getValue(1); SDValue Ops1[2] = { Chain.getValue(0), Chain }; return DAG.getMergeValues(Ops1, 2, dl); @@ -9395,7 +9856,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { EVT ArgVT = Op.getNode()->getValueType(0); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy); + uint32_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy); uint8_t ArgMode; // Decide which area this value should be read from. @@ -9415,7 +9876,8 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // Sanity Check: Make sure using fp_offset makes sense. assert(!getTargetMachine().Options.UseSoftFloat && !(DAG.getMachineFunction() - .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) && + .getFunction()->getFnAttributes() + .hasAttribute(Attributes::NoImplicitFloat)) && Subtarget->hasSSE1()); } @@ -9446,7 +9908,8 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { false, false, false, 0); } -SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { // X86-64 va_list is a struct { i32, i32, i8*, i8* }. assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!"); SDValue Chain = Op.getOperand(0); @@ -9507,8 +9970,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); } -SDValue -X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); switch (IntNo) { @@ -9896,62 +10358,6 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const Op.getOperand(1), Op.getOperand(2), DAG); } - // Fix vector shift instructions where the last operand is a non-immediate - // i32 value. - case Intrinsic::x86_mmx_pslli_w: - case Intrinsic::x86_mmx_pslli_d: - case Intrinsic::x86_mmx_pslli_q: - case Intrinsic::x86_mmx_psrli_w: - case Intrinsic::x86_mmx_psrli_d: - case Intrinsic::x86_mmx_psrli_q: - case Intrinsic::x86_mmx_psrai_w: - case Intrinsic::x86_mmx_psrai_d: { - SDValue ShAmt = Op.getOperand(2); - if (isa<ConstantSDNode>(ShAmt)) - return SDValue(); - - unsigned NewIntNo; - switch (IntNo) { - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::x86_mmx_pslli_w: - NewIntNo = Intrinsic::x86_mmx_psll_w; - break; - case Intrinsic::x86_mmx_pslli_d: - NewIntNo = Intrinsic::x86_mmx_psll_d; - break; - case Intrinsic::x86_mmx_pslli_q: - NewIntNo = Intrinsic::x86_mmx_psll_q; - break; - case Intrinsic::x86_mmx_psrli_w: - NewIntNo = Intrinsic::x86_mmx_psrl_w; - break; - case Intrinsic::x86_mmx_psrli_d: - NewIntNo = Intrinsic::x86_mmx_psrl_d; - break; - case Intrinsic::x86_mmx_psrli_q: - NewIntNo = Intrinsic::x86_mmx_psrl_q; - break; - case Intrinsic::x86_mmx_psrai_w: - NewIntNo = Intrinsic::x86_mmx_psra_w; - break; - case Intrinsic::x86_mmx_psrai_d: - NewIntNo = Intrinsic::x86_mmx_psra_d; - break; - } - - // The vector shift intrinsics with scalars uses 32b shift amounts but - // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits - // to be zero. - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, ShAmt, - DAG.getConstant(0, MVT::i32)); -// FIXME this must be lowered to get rid of the invalid type. - - EVT VT = Op.getValueType(); - ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(NewIntNo, MVT::i32), - Op.getOperand(1), ShAmt); - } case Intrinsic::x86_sse42_pcmpistria128: case Intrinsic::x86_sse42_pcmpestria128: case Intrinsic::x86_sse42_pcmpistric128: @@ -10030,11 +10436,78 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); } + case Intrinsic::x86_fma_vfmadd_ps: + case Intrinsic::x86_fma_vfmadd_pd: + case Intrinsic::x86_fma_vfmsub_ps: + case Intrinsic::x86_fma_vfmsub_pd: + case Intrinsic::x86_fma_vfnmadd_ps: + case Intrinsic::x86_fma_vfnmadd_pd: + case Intrinsic::x86_fma_vfnmsub_ps: + case Intrinsic::x86_fma_vfnmsub_pd: + case Intrinsic::x86_fma_vfmaddsub_ps: + case Intrinsic::x86_fma_vfmaddsub_pd: + case Intrinsic::x86_fma_vfmsubadd_ps: + case Intrinsic::x86_fma_vfmsubadd_pd: + case Intrinsic::x86_fma_vfmadd_ps_256: + case Intrinsic::x86_fma_vfmadd_pd_256: + case Intrinsic::x86_fma_vfmsub_ps_256: + case Intrinsic::x86_fma_vfmsub_pd_256: + case Intrinsic::x86_fma_vfnmadd_ps_256: + case Intrinsic::x86_fma_vfnmadd_pd_256: + case Intrinsic::x86_fma_vfnmsub_ps_256: + case Intrinsic::x86_fma_vfnmsub_pd_256: + case Intrinsic::x86_fma_vfmaddsub_ps_256: + case Intrinsic::x86_fma_vfmaddsub_pd_256: + case Intrinsic::x86_fma_vfmsubadd_ps_256: + case Intrinsic::x86_fma_vfmsubadd_pd_256: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_fma_vfmadd_ps: + case Intrinsic::x86_fma_vfmadd_pd: + case Intrinsic::x86_fma_vfmadd_ps_256: + case Intrinsic::x86_fma_vfmadd_pd_256: + Opc = X86ISD::FMADD; + break; + case Intrinsic::x86_fma_vfmsub_ps: + case Intrinsic::x86_fma_vfmsub_pd: + case Intrinsic::x86_fma_vfmsub_ps_256: + case Intrinsic::x86_fma_vfmsub_pd_256: + Opc = X86ISD::FMSUB; + break; + case Intrinsic::x86_fma_vfnmadd_ps: + case Intrinsic::x86_fma_vfnmadd_pd: + case Intrinsic::x86_fma_vfnmadd_ps_256: + case Intrinsic::x86_fma_vfnmadd_pd_256: + Opc = X86ISD::FNMADD; + break; + case Intrinsic::x86_fma_vfnmsub_ps: + case Intrinsic::x86_fma_vfnmsub_pd: + case Intrinsic::x86_fma_vfnmsub_ps_256: + case Intrinsic::x86_fma_vfnmsub_pd_256: + Opc = X86ISD::FNMSUB; + break; + case Intrinsic::x86_fma_vfmaddsub_ps: + case Intrinsic::x86_fma_vfmaddsub_pd: + case Intrinsic::x86_fma_vfmaddsub_ps_256: + case Intrinsic::x86_fma_vfmaddsub_pd_256: + Opc = X86ISD::FMADDSUB; + break; + case Intrinsic::x86_fma_vfmsubadd_ps: + case Intrinsic::x86_fma_vfmsubadd_pd: + case Intrinsic::x86_fma_vfmsubadd_ps_256: + case Intrinsic::x86_fma_vfmsubadd_pd_256: + Opc = X86ISD::FMSUBADD; + break; + } + + return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + } } } -SDValue -X86TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); switch (IntNo) { @@ -10072,21 +10545,21 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); + EVT PtrVT = getPointerTy(); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = - DAG.getConstant(TD->getPointerSize(), - Subtarget->is64Bit() ? MVT::i64 : MVT::i32); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - DAG.getNode(ISD::ADD, dl, getPointerTy(), + DAG.getConstant(RegInfo->getSlotSize(), PtrVT); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), MachinePointerInfo(), false, false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, MachinePointerInfo(), false, false, false, 0); } @@ -10108,7 +10581,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const { - return DAG.getIntPtrConstant(2*TD->getPointerSize()); + return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize()); } SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { @@ -10123,7 +10596,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX); SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame, - DAG.getIntPtrConstant(TD->getPointerSize())); + DAG.getIntPtrConstant(RegInfo->getSlotSize())); StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), false, false, 0); @@ -10134,8 +10607,22 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { Chain, DAG.getRegister(StoreAddrReg, getPointerTy())); } -SDValue X86TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, - SelectionDAG &DAG) const { +SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL, + DAG.getVTList(MVT::i32, MVT::Other), + Op.getOperand(0), Op.getOperand(1)); +} + +SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other, + Op.getOperand(0), Op.getOperand(1)); +} + +static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) { return Op.getOperand(0); } @@ -10148,6 +10635,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, DebugLoc dl = Op.getDebugLoc(); const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); + const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo(); if (Subtarget->is64Bit()) { SDValue OutChains[6]; @@ -10156,8 +10644,8 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode. const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode. - const unsigned char N86R10 = X86_MC::getX86RegNum(X86::R10); - const unsigned char N86R11 = X86_MC::getX86RegNum(X86::R11); + const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7; + const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7; const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix @@ -10230,7 +10718,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, for (FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); I != E; ++I, ++Idx) - if (Attrs.paramHasAttr(Idx, Attribute::InReg)) + if (Attrs.getParamAttributes(Idx).hasAttribute(Attributes::InReg)) // FIXME: should only count parameters that are lowered to integers. InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; @@ -10259,7 +10747,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, // This is storing the opcode for MOV32ri. const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte. - const unsigned char N86Reg = X86_MC::getX86RegNum(NestReg); + const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7; OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), Trmp, MachinePointerInfo(TrmpAddr), @@ -10358,7 +10846,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal); } -SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); EVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); @@ -10392,8 +10880,7 @@ SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const { return Op; } -SDValue X86TargetLowering::LowerCTLZ_ZERO_UNDEF(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); EVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); @@ -10418,7 +10905,7 @@ SDValue X86TargetLowering::LowerCTLZ_ZERO_UNDEF(SDValue Op, return Op; } -SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); unsigned NumBits = VT.getSizeInBits(); DebugLoc dl = Op.getDebugLoc(); @@ -10467,21 +10954,22 @@ static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) { DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2)); } -SDValue X86TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) { assert(Op.getValueType().is256BitVector() && Op.getValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return Lower256IntArith(Op, DAG); } -SDValue X86TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) { assert(Op.getValueType().is256BitVector() && Op.getValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return Lower256IntArith(Op, DAG); } -SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { EVT VT = Op.getValueType(); // Decompose 256-bit ops into smaller 128-bit ops. @@ -10756,7 +11244,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { // Lower the "add/sub/mul with overflow" instruction into a regular ins plus // a "setcc" instruction that checks the overflow flag. The "brcond" lowering // looks for this combo and may remove the "setcc" instruction if the "setcc" @@ -10871,7 +11359,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra); LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra); - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2); } // fall through case MVT::v4i32: @@ -10884,7 +11372,8 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, } -SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ +static SDValue LowerMEMBARRIER(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); // Go ahead and emit the fence on x86-64 even if we asked for no-sse2. @@ -10929,8 +11418,8 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); } -SDValue X86TargetLowering::LowerATOMIC_FENCE(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>( cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); @@ -10968,7 +11457,8 @@ SDValue X86TargetLowering::LowerATOMIC_FENCE(SDValue Op, } -SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { EVT T = Op.getValueType(); DebugLoc DL = Op.getDebugLoc(); unsigned Reg = 0; @@ -10999,8 +11489,8 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const { return cpOut; } -SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { assert(Subtarget->is64Bit() && "Result not type legalized?"); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue TheChain = Op.getOperand(0); @@ -11018,8 +11508,7 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, return DAG.getMergeValues(Ops, 2, dl); } -SDValue X86TargetLowering::LowerBITCAST(SDValue Op, - SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { EVT SrcVT = Op.getOperand(0).getValueType(); EVT DstVT = Op.getValueType(); assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() && @@ -11039,7 +11528,7 @@ SDValue X86TargetLowering::LowerBITCAST(SDValue Op, return SDValue(); } -SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); EVT T = Node->getValueType(0); @@ -11112,9 +11601,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG); - case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG); - case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op,DAG); - case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); + case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, Subtarget, DAG); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG); + case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op, Subtarget, DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); @@ -11122,8 +11611,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); - case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); - case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG); + case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG); + case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); @@ -11135,8 +11624,11 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); + case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG); + case ISD::ZERO_EXTEND: return lowerZERO_EXTEND(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); + case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG); case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FNEG: return LowerFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); @@ -11147,7 +11639,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); - case ISD::VACOPY: return LowerVACOPY(Op, DAG); + case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); @@ -11156,13 +11648,15 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); + case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::CTLZ: return LowerCTLZ(Op, DAG); case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op, DAG); - case ISD::MUL: return LowerMUL(Op, DAG); + case ISD::MUL: return LowerMUL(Op, Subtarget, DAG); case ISD::SRA: case ISD::SRL: case ISD::SHL: return LowerShift(Op, DAG); @@ -11172,7 +11666,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::USUBO: case ISD::SMULO: case ISD::UMULO: return LowerXALUO(Op, DAG); - case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG); + case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG); case ISD::BITCAST: return LowerBITCAST(Op, DAG); case ISD::ADDC: case ISD::ADDE: @@ -11265,6 +11759,27 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } return; } + case ISD::UINT_TO_FP: { + if (N->getOperand(0).getValueType() != MVT::v2i32 && + N->getValueType(0) != MVT::v2f32) + return; + SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, + N->getOperand(0)); + SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), + MVT::f64); + SDValue VBias = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f64, Bias, Bias); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn, + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, VBias)); + Or = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or); + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias); + Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub)); + return; + } + case ISD::FP_ROUND: { + SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0)); + Results.push_back(V); + return; + } case ISD::READCYCLECOUNTER: { SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue TheChain = N->getOperand(0); @@ -11332,6 +11847,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_SWAP: { unsigned Opc; switch (N->getOpcode()) { @@ -11354,6 +11873,18 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_LOAD_XOR: Opc = X86ISD::ATOMXOR64_DAG; break; + case ISD::ATOMIC_LOAD_MAX: + Opc = X86ISD::ATOMMAX64_DAG; + break; + case ISD::ATOMIC_LOAD_MIN: + Opc = X86ISD::ATOMMIN64_DAG; + break; + case ISD::ATOMIC_LOAD_UMAX: + Opc = X86ISD::ATOMUMAX64_DAG; + break; + case ISD::ATOMIC_LOAD_UMIN: + Opc = X86ISD::ATOMUMIN64_DAG; + break; case ISD::ATOMIC_SWAP: Opc = X86ISD::ATOMSWAP64_DAG; break; @@ -11420,11 +11951,15 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FHSUB: return "X86ISD::FHSUB"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; + case X86ISD::FMAXC: return "X86ISD::FMAXC"; + case X86ISD::FMINC: return "X86ISD::FMINC"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR"; case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; + case X86ISD::EH_SJLJ_SETJMP: return "X86ISD::EH_SJLJ_SETJMP"; + case X86ISD::EH_SJLJ_LONGJMP: return "X86ISD::EH_SJLJ_LONGJMP"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; @@ -11440,7 +11975,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL"; case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; + case X86ISD::VZEXT: return "X86ISD::VZEXT"; + case X86ISD::VSEXT: return "X86ISD::VSEXT"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; + case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ"; case X86ISD::VSHL: return "X86ISD::VSHL"; @@ -11507,6 +12045,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FNMSUB: return "X86ISD::FNMSUB"; case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB"; case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD"; + case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI"; + case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI"; } } @@ -11655,430 +12195,724 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, // X86 Scheduler Hooks //===----------------------------------------------------------------------===// -// private utility function -MachineBasicBlock * -X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, - MachineBasicBlock *MBB, - unsigned regOpc, - unsigned immOpc, - unsigned LoadOpc, - unsigned CXchgOpc, - unsigned notOpc, - unsigned EAXreg, - const TargetRegisterClass *RC, - bool Invert) const { - // For the atomic bitwise operator, we generate - // thisMBB: - // newMBB: - // ld t1 = [bitinstr.addr] - // op t2 = t1, [bitinstr.val] - // not t3 = t2 (if Invert) - // mov EAX = t1 - // lcs dest = [bitinstr.addr], t3 [EAX is implicit] - // bz newMBB - // fallthrough -->nextMBB - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const BasicBlock *LLVM_BB = MBB->getBasicBlock(); - MachineFunction::iterator MBBIter = MBB; - ++MBBIter; +/// Utility function to emit xbegin specifying the start of an RTM region. +static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB, + const TargetInstrInfo *TII) { + DebugLoc DL = MI->getDebugLoc(); + + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator I = MBB; + ++I; + + // For the v = xbegin(), we generate + // + // thisMBB: + // xbegin sinkMBB + // + // mainMBB: + // eax = -1 + // + // sinkMBB: + // v = eax - /// First build the CFG - MachineFunction *F = MBB->getParent(); MachineBasicBlock *thisMBB = MBB; - MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(MBBIter, newMBB); - F->insert(MBBIter, nextMBB); - - // Transfer the remainder of thisMBB and its successor edges to nextMBB. - nextMBB->splice(nextMBB->begin(), thisMBB, - llvm::next(MachineBasicBlock::iterator(bInstr)), - thisMBB->end()); - nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); - - // Update thisMBB to fall through to newMBB - thisMBB->addSuccessor(newMBB); - - // newMBB jumps to itself and fall through to nextMBB - newMBB->addSuccessor(nextMBB); - newMBB->addSuccessor(newMBB); - - // Insert instructions into newMBB based on incoming instruction - assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 && - "unexpected number of operands"); - DebugLoc dl = bInstr->getDebugLoc(); - MachineOperand& destOper = bInstr->getOperand(0); - MachineOperand* argOpers[2 + X86::AddrNumOperands]; - int numArgs = bInstr->getNumOperands() - 1; - for (int i=0; i < numArgs; ++i) - argOpers[i] = &bInstr->getOperand(i+1); - - // x86 address has 4 operands: base, index, scale, and displacement - int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] - int valArgIndx = lastAddrIndx + 1; - - unsigned t1 = F->getRegInfo().createVirtualRegister(RC); - MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1); - for (int i=0; i <= lastAddrIndx; ++i) - (*MIB).addOperand(*argOpers[i]); - - unsigned t2 = F->getRegInfo().createVirtualRegister(RC); - assert((argOpers[valArgIndx]->isReg() || - argOpers[valArgIndx]->isImm()) && - "invalid operand"); - if (argOpers[valArgIndx]->isReg()) - MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2); - else - MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2); - MIB.addReg(t1); - (*MIB).addOperand(*argOpers[valArgIndx]); + MachineFunction *MF = MBB->getParent(); + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, mainMBB); + MF->insert(I, sinkMBB); - unsigned t3 = F->getRegInfo().createVirtualRegister(RC); - if (Invert) { - MIB = BuildMI(newMBB, dl, TII->get(notOpc), t3).addReg(t2); - } - else - t3 = t2; + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // thisMBB: + // xbegin sinkMBB + // # fallthrough to mainMBB + // # abortion to sinkMBB + BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(sinkMBB); + thisMBB->addSuccessor(mainMBB); + thisMBB->addSuccessor(sinkMBB); + + // mainMBB: + // EAX = -1 + BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), X86::EAX).addImm(-1); + mainMBB->addSuccessor(sinkMBB); + + // sinkMBB: + // EAX is live into the sinkMBB + sinkMBB->addLiveIn(X86::EAX); + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) + .addReg(X86::EAX); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg); - MIB.addReg(t1); + MI->eraseFromParent(); + return sinkMBB; +} - MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc)); - for (int i=0; i <= lastAddrIndx; ++i) - (*MIB).addOperand(*argOpers[i]); - MIB.addReg(t3); - assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); - (*MIB).setMemRefs(bInstr->memoperands_begin(), - bInstr->memoperands_end()); +// Get CMPXCHG opcode for the specified data type. +static unsigned getCmpXChgOpcode(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + case MVT::i8: return X86::LCMPXCHG8; + case MVT::i16: return X86::LCMPXCHG16; + case MVT::i32: return X86::LCMPXCHG32; + case MVT::i64: return X86::LCMPXCHG64; + default: + break; + } + llvm_unreachable("Invalid operand size!"); +} - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg()); - MIB.addReg(EAXreg); +// Get LOAD opcode for the specified data type. +static unsigned getLoadOpcode(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + case MVT::i8: return X86::MOV8rm; + case MVT::i16: return X86::MOV16rm; + case MVT::i32: return X86::MOV32rm; + case MVT::i64: return X86::MOV64rm; + default: + break; + } + llvm_unreachable("Invalid operand size!"); +} - // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); +// Get opcode of the non-atomic one from the specified atomic instruction. +static unsigned getNonAtomicOpcode(unsigned Opc) { + switch (Opc) { + case X86::ATOMAND8: return X86::AND8rr; + case X86::ATOMAND16: return X86::AND16rr; + case X86::ATOMAND32: return X86::AND32rr; + case X86::ATOMAND64: return X86::AND64rr; + case X86::ATOMOR8: return X86::OR8rr; + case X86::ATOMOR16: return X86::OR16rr; + case X86::ATOMOR32: return X86::OR32rr; + case X86::ATOMOR64: return X86::OR64rr; + case X86::ATOMXOR8: return X86::XOR8rr; + case X86::ATOMXOR16: return X86::XOR16rr; + case X86::ATOMXOR32: return X86::XOR32rr; + case X86::ATOMXOR64: return X86::XOR64rr; + } + llvm_unreachable("Unhandled atomic-load-op opcode!"); +} + +// Get opcode of the non-atomic one from the specified atomic instruction with +// extra opcode. +static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc, + unsigned &ExtraOpc) { + switch (Opc) { + case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr; + case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr; + case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr; + case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr; + case X86::ATOMMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVL32rr; + case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr; + case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr; + case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr; + case X86::ATOMMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVG32rr; + case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr; + case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr; + case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr; + case X86::ATOMUMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVB32rr; + case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr; + case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr; + case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr; + case X86::ATOMUMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVA32rr; + case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr; + case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr; + case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr; + } + llvm_unreachable("Unhandled atomic-load-op opcode!"); +} + +// Get opcode of the non-atomic one from the specified atomic instruction for +// 64-bit data type on 32-bit target. +static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) { + switch (Opc) { + case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr; + case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr; + case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr; + case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr; + case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr; + case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr; + case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr; + case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr; + case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr; + case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr; + } + llvm_unreachable("Unhandled atomic-load-op opcode!"); +} + +// Get opcode of the non-atomic one from the specified atomic instruction for +// 64-bit data type on 32-bit target with extra opcode. +static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc, + unsigned &HiOpc, + unsigned &ExtraOpc) { + switch (Opc) { + case X86::ATOMNAND6432: + ExtraOpc = X86::NOT32r; + HiOpc = X86::AND32rr; + return X86::AND32rr; + } + llvm_unreachable("Unhandled atomic-load-op opcode!"); +} - bInstr->eraseFromParent(); // The pseudo instruction is gone now. - return nextMBB; +// Get pseudo CMOV opcode from the specified data type. +static unsigned getPseudoCMOVOpc(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + case MVT::i8: return X86::CMOV_GR8; + case MVT::i16: return X86::CMOV_GR16; + case MVT::i32: return X86::CMOV_GR32; + default: + break; + } + llvm_unreachable("Unknown CMOV opcode!"); } -// private utility function: 64 bit atomics on 32 bit host. +// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions. +// They will be translated into a spin-loop or compare-exchange loop from +// +// ... +// dst = atomic-fetch-op MI.addr, MI.val +// ... +// +// to +// +// ... +// EAX = LOAD MI.addr +// loop: +// t1 = OP MI.val, EAX +// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined] +// JNE loop +// sink: +// dst = EAX +// ... MachineBasicBlock * -X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, - MachineBasicBlock *MBB, - unsigned regOpcL, - unsigned regOpcH, - unsigned immOpcL, - unsigned immOpcH, - bool Invert) const { - // For the atomic bitwise operator, we generate - // thisMBB (instructions are in pairs, except cmpxchg8b) - // ld t1,t2 = [bitinstr.addr] - // newMBB: - // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4) - // op t5, t6 <- out1, out2, [bitinstr.val] - // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val]) - // neg t7, t8 < t5, t6 (if Invert) - // mov ECX, EBX <- t5, t6 - // mov EAX, EDX <- t1, t2 - // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit] - // mov t3, t4 <- EAX, EDX - // bz newMBB - // result in out1, out2 - // fallthrough -->nextMBB - - const TargetRegisterClass *RC = &X86::GR32RegClass; - const unsigned LoadOpc = X86::MOV32rm; - const unsigned NotOpc = X86::NOT32r; +X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, + MachineBasicBlock *MBB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const BasicBlock *LLVM_BB = MBB->getBasicBlock(); - MachineFunction::iterator MBBIter = MBB; - ++MBBIter; + DebugLoc DL = MI->getDebugLoc(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator I = MBB; + ++I; + + assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 && + "Unexpected number of operands"); + + assert(MI->hasOneMemOperand() && + "Expected atomic-load-op to have one memoperand"); + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + unsigned DstReg, SrcReg; + unsigned MemOpndSlot; + + unsigned CurOp = 0; + + DstReg = MI->getOperand(CurOp++).getReg(); + MemOpndSlot = CurOp; + CurOp += X86::AddrNumOperands; + SrcReg = MI->getOperand(CurOp++).getReg(); + + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + MVT::SimpleValueType VT = *RC->vt_begin(); + unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT); + + unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT); + unsigned LOADOpc = getLoadOpcode(VT); + + // For the atomic load-arith operator, we generate + // + // thisMBB: + // EAX = LOAD [MI.addr] + // mainMBB: + // t1 = OP MI.val, EAX + // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined] + // JNE mainMBB + // sinkMBB: - /// First build the CFG - MachineFunction *F = MBB->getParent(); MachineBasicBlock *thisMBB = MBB; - MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(MBBIter, newMBB); - F->insert(MBBIter, nextMBB); - - // Transfer the remainder of thisMBB and its successor edges to nextMBB. - nextMBB->splice(nextMBB->begin(), thisMBB, - llvm::next(MachineBasicBlock::iterator(bInstr)), - thisMBB->end()); - nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); - - // Update thisMBB to fall through to newMBB - thisMBB->addSuccessor(newMBB); - - // newMBB jumps to itself and fall through to nextMBB - newMBB->addSuccessor(nextMBB); - newMBB->addSuccessor(newMBB); - - DebugLoc dl = bInstr->getDebugLoc(); - // Insert instructions into newMBB based on incoming instruction - // There are 8 "real" operands plus 9 implicit def/uses, ignored here. - assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 && - "unexpected number of operands"); - MachineOperand& dest1Oper = bInstr->getOperand(0); - MachineOperand& dest2Oper = bInstr->getOperand(1); - MachineOperand* argOpers[2 + X86::AddrNumOperands]; - for (int i=0; i < 2 + X86::AddrNumOperands; ++i) { - argOpers[i] = &bInstr->getOperand(i+2); - - // We use some of the operands multiple times, so conservatively just - // clear any kill flags that might be present. - if (argOpers[i]->isReg() && argOpers[i]->isUse()) - argOpers[i]->setIsKill(false); - } - - // x86 address has 5 operands: base, index, scale, displacement, and segment. - int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] - - unsigned t1 = F->getRegInfo().createVirtualRegister(RC); - MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1); - for (int i=0; i <= lastAddrIndx; ++i) - (*MIB).addOperand(*argOpers[i]); - unsigned t2 = F->getRegInfo().createVirtualRegister(RC); - MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2); - // add 4 to displacement. - for (int i=0; i <= lastAddrIndx-2; ++i) - (*MIB).addOperand(*argOpers[i]); - MachineOperand newOp3 = *(argOpers[3]); - if (newOp3.isImm()) - newOp3.setImm(newOp3.getImm()+4); - else - newOp3.setOffset(newOp3.getOffset()+4); - (*MIB).addOperand(newOp3); - (*MIB).addOperand(*argOpers[lastAddrIndx]); - - // t3/4 are defined later, at the bottom of the loop - unsigned t3 = F->getRegInfo().createVirtualRegister(RC); - unsigned t4 = F->getRegInfo().createVirtualRegister(RC); - BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg()) - .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB); - BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg()) - .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB); - - // The subsequent operations should be using the destination registers of - // the PHI instructions. - t1 = dest1Oper.getReg(); - t2 = dest2Oper.getReg(); - - int valArgIndx = lastAddrIndx + 1; - assert((argOpers[valArgIndx]->isReg() || - argOpers[valArgIndx]->isImm()) && - "invalid operand"); - unsigned t5 = F->getRegInfo().createVirtualRegister(RC); - unsigned t6 = F->getRegInfo().createVirtualRegister(RC); - if (argOpers[valArgIndx]->isReg()) - MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5); - else - MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5); - if (regOpcL != X86::MOV32rr) - MIB.addReg(t1); - (*MIB).addOperand(*argOpers[valArgIndx]); - assert(argOpers[valArgIndx + 1]->isReg() == - argOpers[valArgIndx]->isReg()); - assert(argOpers[valArgIndx + 1]->isImm() == - argOpers[valArgIndx]->isImm()); - if (argOpers[valArgIndx + 1]->isReg()) - MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6); - else - MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6); - if (regOpcH != X86::MOV32rr) - MIB.addReg(t2); - (*MIB).addOperand(*argOpers[valArgIndx + 1]); - - unsigned t7, t8; - if (Invert) { - t7 = F->getRegInfo().createVirtualRegister(RC); - t8 = F->getRegInfo().createVirtualRegister(RC); - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t7).addReg(t5); - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t8).addReg(t6); - } else { - t7 = t5; - t8 = t6; + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, mainMBB); + MF->insert(I, sinkMBB); + + MachineInstrBuilder MIB; + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // thisMBB: + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), AccPhyReg); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) + MIB.addOperand(MI->getOperand(MemOpndSlot + i)); + MIB.setMemRefs(MMOBegin, MMOEnd); + + thisMBB->addSuccessor(mainMBB); + + // mainMBB: + MachineBasicBlock *origMainMBB = mainMBB; + mainMBB->addLiveIn(AccPhyReg); + + // Copy AccPhyReg as it is used more than once. + unsigned AccReg = MRI.createVirtualRegister(RC); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccReg) + .addReg(AccPhyReg); + + unsigned t1 = MRI.createVirtualRegister(RC); + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: + llvm_unreachable("Unhandled atomic-load-op opcode!"); + case X86::ATOMAND8: + case X86::ATOMAND16: + case X86::ATOMAND32: + case X86::ATOMAND64: + case X86::ATOMOR8: + case X86::ATOMOR16: + case X86::ATOMOR32: + case X86::ATOMOR64: + case X86::ATOMXOR8: + case X86::ATOMXOR16: + case X86::ATOMXOR32: + case X86::ATOMXOR64: { + unsigned ARITHOpc = getNonAtomicOpcode(Opc); + BuildMI(mainMBB, DL, TII->get(ARITHOpc), t1).addReg(SrcReg) + .addReg(AccReg); + break; + } + case X86::ATOMNAND8: + case X86::ATOMNAND16: + case X86::ATOMNAND32: + case X86::ATOMNAND64: { + unsigned t2 = MRI.createVirtualRegister(RC); + unsigned NOTOpc; + unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc); + BuildMI(mainMBB, DL, TII->get(ANDOpc), t2).addReg(SrcReg) + .addReg(AccReg); + BuildMI(mainMBB, DL, TII->get(NOTOpc), t1).addReg(t2); + break; + } + case X86::ATOMMAX8: + case X86::ATOMMAX16: + case X86::ATOMMAX32: + case X86::ATOMMAX64: + case X86::ATOMMIN8: + case X86::ATOMMIN16: + case X86::ATOMMIN32: + case X86::ATOMMIN64: + case X86::ATOMUMAX8: + case X86::ATOMUMAX16: + case X86::ATOMUMAX32: + case X86::ATOMUMAX64: + case X86::ATOMUMIN8: + case X86::ATOMUMIN16: + case X86::ATOMUMIN32: + case X86::ATOMUMIN64: { + unsigned CMPOpc; + unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc); + + BuildMI(mainMBB, DL, TII->get(CMPOpc)) + .addReg(SrcReg) + .addReg(AccReg); + + if (Subtarget->hasCMov()) { + if (VT != MVT::i8) { + // Native support + BuildMI(mainMBB, DL, TII->get(CMOVOpc), t1) + .addReg(SrcReg) + .addReg(AccReg); + } else { + // Promote i8 to i32 to use CMOV32 + const TargetRegisterClass *RC32 = getRegClassFor(MVT::i32); + unsigned SrcReg32 = MRI.createVirtualRegister(RC32); + unsigned AccReg32 = MRI.createVirtualRegister(RC32); + unsigned t2 = MRI.createVirtualRegister(RC32); + + unsigned Undef = MRI.createVirtualRegister(RC32); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef); + + BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), SrcReg32) + .addReg(Undef) + .addReg(SrcReg) + .addImm(X86::sub_8bit); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32) + .addReg(Undef) + .addReg(AccReg) + .addImm(X86::sub_8bit); + + BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2) + .addReg(SrcReg32) + .addReg(AccReg32); + + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t1) + .addReg(t2, 0, X86::sub_8bit); + } + } else { + // Use pseudo select and lower them. + assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && + "Invalid atomic-load-op transformation!"); + unsigned SelOpc = getPseudoCMOVOpc(VT); + X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc); + assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!"); + MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t1) + .addReg(SrcReg).addReg(AccReg) + .addImm(CC); + mainMBB = EmitLoweredSelect(MIB, mainMBB); + } + break; + } } - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX); - MIB.addReg(t1); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX); - MIB.addReg(t2); + // Copy AccPhyReg back from virtual register. + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccPhyReg) + .addReg(AccReg); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX); - MIB.addReg(t7); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX); - MIB.addReg(t8); + MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc)); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) + MIB.addOperand(MI->getOperand(MemOpndSlot + i)); + MIB.addReg(t1); + MIB.setMemRefs(MMOBegin, MMOEnd); - MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B)); - for (int i=0; i <= lastAddrIndx; ++i) - (*MIB).addOperand(*argOpers[i]); + BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB); - assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); - (*MIB).setMemRefs(bInstr->memoperands_begin(), - bInstr->memoperands_end()); + mainMBB->addSuccessor(origMainMBB); + mainMBB->addSuccessor(sinkMBB); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3); - MIB.addReg(X86::EAX); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4); - MIB.addReg(X86::EDX); + // sinkMBB: + sinkMBB->addLiveIn(AccPhyReg); - // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(TargetOpcode::COPY), DstReg) + .addReg(AccPhyReg); - bInstr->eraseFromParent(); // The pseudo instruction is gone now. - return nextMBB; + MI->eraseFromParent(); + return sinkMBB; } -// private utility function +// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic +// instructions. They will be translated into a spin-loop or compare-exchange +// loop from +// +// ... +// dst = atomic-fetch-op MI.addr, MI.val +// ... +// +// to +// +// ... +// EAX = LOAD [MI.addr + 0] +// EDX = LOAD [MI.addr + 4] +// loop: +// EBX = OP MI.val.lo, EAX +// ECX = OP MI.val.hi, EDX +// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined] +// JNE loop +// sink: +// dst = EDX:EAX +// ... MachineBasicBlock * -X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, - MachineBasicBlock *MBB, - unsigned cmovOpc) const { - // For the atomic min/max operator, we generate - // thisMBB: - // newMBB: - // ld t1 = [min/max.addr] - // mov t2 = [min/max.val] - // cmp t1, t2 - // cmov[cond] t2 = t1 - // mov EAX = t1 - // lcs dest = [bitinstr.addr], t2 [EAX is implicit] - // bz newMBB - // fallthrough -->nextMBB - // +X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, + MachineBasicBlock *MBB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const BasicBlock *LLVM_BB = MBB->getBasicBlock(); - MachineFunction::iterator MBBIter = MBB; - ++MBBIter; + DebugLoc DL = MI->getDebugLoc(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator I = MBB; + ++I; + + assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 && + "Unexpected number of operands"); + + assert(MI->hasOneMemOperand() && + "Expected atomic-load-op32 to have one memoperand"); + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + unsigned DstLoReg, DstHiReg; + unsigned SrcLoReg, SrcHiReg; + unsigned MemOpndSlot; + + unsigned CurOp = 0; + + DstLoReg = MI->getOperand(CurOp++).getReg(); + DstHiReg = MI->getOperand(CurOp++).getReg(); + MemOpndSlot = CurOp; + CurOp += X86::AddrNumOperands; + SrcLoReg = MI->getOperand(CurOp++).getReg(); + SrcHiReg = MI->getOperand(CurOp++).getReg(); + + const TargetRegisterClass *RC = &X86::GR32RegClass; + const TargetRegisterClass *RC8 = &X86::GR8RegClass; + + unsigned LCMPXCHGOpc = X86::LCMPXCHG8B; + unsigned LOADOpc = X86::MOV32rm; + + // For the atomic load-arith operator, we generate + // + // thisMBB: + // EAX = LOAD [MI.addr + 0] + // EDX = LOAD [MI.addr + 4] + // mainMBB: + // EBX = OP MI.vallo, EAX + // ECX = OP MI.valhi, EDX + // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined] + // JNE mainMBB + // sinkMBB: - /// First build the CFG - MachineFunction *F = MBB->getParent(); MachineBasicBlock *thisMBB = MBB; - MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(MBBIter, newMBB); - F->insert(MBBIter, nextMBB); - - // Transfer the remainder of thisMBB and its successor edges to nextMBB. - nextMBB->splice(nextMBB->begin(), thisMBB, - llvm::next(MachineBasicBlock::iterator(mInstr)), - thisMBB->end()); - nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); - - // Update thisMBB to fall through to newMBB - thisMBB->addSuccessor(newMBB); - - // newMBB jumps to newMBB and fall through to nextMBB - newMBB->addSuccessor(nextMBB); - newMBB->addSuccessor(newMBB); - - DebugLoc dl = mInstr->getDebugLoc(); - // Insert instructions into newMBB based on incoming instruction - assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 && - "unexpected number of operands"); - MachineOperand& destOper = mInstr->getOperand(0); - MachineOperand* argOpers[2 + X86::AddrNumOperands]; - int numArgs = mInstr->getNumOperands() - 1; - for (int i=0; i < numArgs; ++i) - argOpers[i] = &mInstr->getOperand(i+1); - - // x86 address has 4 operands: base, index, scale, and displacement - int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] - int valArgIndx = lastAddrIndx + 1; - - unsigned t1 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass); - MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1); - for (int i=0; i <= lastAddrIndx; ++i) - (*MIB).addOperand(*argOpers[i]); - - // We only support register and immediate values - assert((argOpers[valArgIndx]->isReg() || - argOpers[valArgIndx]->isImm()) && - "invalid operand"); - - unsigned t2 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass); - if (argOpers[valArgIndx]->isReg()) - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2); - else - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2); - (*MIB).addOperand(*argOpers[valArgIndx]); + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, mainMBB); + MF->insert(I, sinkMBB); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX); - MIB.addReg(t1); + MachineInstrBuilder MIB; - MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr)); - MIB.addReg(t1); - MIB.addReg(t2); + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // thisMBB: + // Lo + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EAX); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) + MIB.addOperand(MI->getOperand(MemOpndSlot + i)); + MIB.setMemRefs(MMOBegin, MMOEnd); + // Hi + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) + MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32) + else + MIB.addOperand(MI->getOperand(MemOpndSlot + i)); + } + MIB.setMemRefs(MMOBegin, MMOEnd); - // Generate movc - unsigned t3 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass); - MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3); - MIB.addReg(t2); - MIB.addReg(t1); + thisMBB->addSuccessor(mainMBB); + + // mainMBB: + MachineBasicBlock *origMainMBB = mainMBB; + mainMBB->addLiveIn(X86::EAX); + mainMBB->addLiveIn(X86::EDX); + + // Copy EDX:EAX as they are used more than once. + unsigned LoReg = MRI.createVirtualRegister(RC); + unsigned HiReg = MRI.createVirtualRegister(RC); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), LoReg).addReg(X86::EAX); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), HiReg).addReg(X86::EDX); + + unsigned t1L = MRI.createVirtualRegister(RC); + unsigned t1H = MRI.createVirtualRegister(RC); + + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: + llvm_unreachable("Unhandled atomic-load-op6432 opcode!"); + case X86::ATOMAND6432: + case X86::ATOMOR6432: + case X86::ATOMXOR6432: + case X86::ATOMADD6432: + case X86::ATOMSUB6432: { + unsigned HiOpc; + unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); + BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(LoReg).addReg(SrcLoReg); + BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(HiReg).addReg(SrcHiReg); + break; + } + case X86::ATOMNAND6432: { + unsigned HiOpc, NOTOpc; + unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc); + unsigned t2L = MRI.createVirtualRegister(RC); + unsigned t2H = MRI.createVirtualRegister(RC); + BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg).addReg(LoReg); + BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg).addReg(HiReg); + BuildMI(mainMBB, DL, TII->get(NOTOpc), t1L).addReg(t2L); + BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H); + break; + } + case X86::ATOMMAX6432: + case X86::ATOMMIN6432: + case X86::ATOMUMAX6432: + case X86::ATOMUMIN6432: { + unsigned HiOpc; + unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); + unsigned cL = MRI.createVirtualRegister(RC8); + unsigned cH = MRI.createVirtualRegister(RC8); + unsigned cL32 = MRI.createVirtualRegister(RC); + unsigned cH32 = MRI.createVirtualRegister(RC); + unsigned cc = MRI.createVirtualRegister(RC); + // cl := cmp src_lo, lo + BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) + .addReg(SrcLoReg).addReg(LoReg); + BuildMI(mainMBB, DL, TII->get(LoOpc), cL); + BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL); + // ch := cmp src_hi, hi + BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) + .addReg(SrcHiReg).addReg(HiReg); + BuildMI(mainMBB, DL, TII->get(HiOpc), cH); + BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH); + // cc := if (src_hi == hi) ? cl : ch; + if (Subtarget->hasCMov()) { + BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc) + .addReg(cH32).addReg(cL32); + } else { + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc) + .addReg(cH32).addReg(cL32) + .addImm(X86::COND_E); + mainMBB = EmitLoweredSelect(MIB, mainMBB); + } + BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc); + if (Subtarget->hasCMov()) { + BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1L) + .addReg(SrcLoReg).addReg(LoReg); + BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1H) + .addReg(SrcHiReg).addReg(HiReg); + } else { + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1L) + .addReg(SrcLoReg).addReg(LoReg) + .addImm(X86::COND_NE); + mainMBB = EmitLoweredSelect(MIB, mainMBB); + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1H) + .addReg(SrcHiReg).addReg(HiReg) + .addImm(X86::COND_NE); + mainMBB = EmitLoweredSelect(MIB, mainMBB); + } + break; + } + case X86::ATOMSWAP6432: { + unsigned HiOpc; + unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); + BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg); + BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg); + break; + } + } + + // Copy EDX:EAX back from HiReg:LoReg + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(LoReg); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(HiReg); + // Copy ECX:EBX from t1H:t1L + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t1L); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t1H); + + MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc)); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) + MIB.addOperand(MI->getOperand(MemOpndSlot + i)); + MIB.setMemRefs(MMOBegin, MMOEnd); - // Cmp and exchange if none has modified the memory location - MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32)); - for (int i=0; i <= lastAddrIndx; ++i) - (*MIB).addOperand(*argOpers[i]); - MIB.addReg(t3); - assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand"); - (*MIB).setMemRefs(mInstr->memoperands_begin(), - mInstr->memoperands_end()); + BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg()); - MIB.addReg(X86::EAX); + mainMBB->addSuccessor(origMainMBB); + mainMBB->addSuccessor(sinkMBB); - // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); + // sinkMBB: + sinkMBB->addLiveIn(X86::EAX); + sinkMBB->addLiveIn(X86::EDX); + + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(TargetOpcode::COPY), DstLoReg) + .addReg(X86::EAX); + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(TargetOpcode::COPY), DstHiReg) + .addReg(X86::EDX); - mInstr->eraseFromParent(); // The pseudo instruction is gone now. - return nextMBB; + MI->eraseFromParent(); + return sinkMBB; } // FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8 // or XMM0_V32I8 in AVX all of this code can be replaced with that // in the .td file. -MachineBasicBlock * -X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, - unsigned numArgs, bool memArg) const { - assert(Subtarget->hasSSE42() && - "Target must have SSE4.2 or AVX features enabled"); +static MachineBasicBlock *EmitPCMPSTRM(MachineInstr *MI, MachineBasicBlock *BB, + const TargetInstrInfo *TII) { + unsigned Opc; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::PCMPISTRM128REG: Opc = X86::PCMPISTRM128rr; break; + case X86::VPCMPISTRM128REG: Opc = X86::VPCMPISTRM128rr; break; + case X86::PCMPISTRM128MEM: Opc = X86::PCMPISTRM128rm; break; + case X86::VPCMPISTRM128MEM: Opc = X86::VPCMPISTRM128rm; break; + case X86::PCMPESTRM128REG: Opc = X86::PCMPESTRM128rr; break; + case X86::VPCMPESTRM128REG: Opc = X86::VPCMPESTRM128rr; break; + case X86::PCMPESTRM128MEM: Opc = X86::PCMPESTRM128rm; break; + case X86::VPCMPESTRM128MEM: Opc = X86::VPCMPESTRM128rm; break; + } DebugLoc dl = MI->getDebugLoc(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc)); + + unsigned NumArgs = MI->getNumOperands(); + for (unsigned i = 1; i < NumArgs; ++i) { + MachineOperand &Op = MI->getOperand(i); + if (!(Op.isReg() && Op.isImplicit())) + MIB.addOperand(Op); + } + if (MI->hasOneMemOperand()) + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + BuildMI(*BB, MI, dl, + TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) + .addReg(X86::XMM0); + + MI->eraseFromParent(); + return BB; +} + +// FIXME: Custom handling because TableGen doesn't support multiple implicit +// defs in an instruction pattern +static MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB, + const TargetInstrInfo *TII) { unsigned Opc; - if (!Subtarget->hasAVX()) { - if (memArg) - Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm; - else - Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr; - } else { - if (memArg) - Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm; - else - Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::PCMPISTRIREG: Opc = X86::PCMPISTRIrr; break; + case X86::VPCMPISTRIREG: Opc = X86::VPCMPISTRIrr; break; + case X86::PCMPISTRIMEM: Opc = X86::PCMPISTRIrm; break; + case X86::VPCMPISTRIMEM: Opc = X86::VPCMPISTRIrm; break; + case X86::PCMPESTRIREG: Opc = X86::PCMPESTRIrr; break; + case X86::VPCMPESTRIREG: Opc = X86::VPCMPESTRIrr; break; + case X86::PCMPESTRIMEM: Opc = X86::PCMPESTRIrm; break; + case X86::VPCMPESTRIMEM: Opc = X86::VPCMPESTRIrm; break; } + DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc)); - for (unsigned i = 0; i < numArgs; ++i) { - MachineOperand &Op = MI->getOperand(i+1); + + unsigned NumArgs = MI->getNumOperands(); // remove the results + for (unsigned i = 1; i < NumArgs; ++i) { + MachineOperand &Op = MI->getOperand(i); if (!(Op.isReg() && Op.isImplicit())) MIB.addOperand(Op); } + if (MI->hasOneMemOperand()) + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) - .addReg(X86::XMM0); + .addReg(X86::ECX); MI->eraseFromParent(); return BB; } -MachineBasicBlock * -X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const { +static MachineBasicBlock * EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB, + const TargetInstrInfo *TII, + const X86Subtarget* Subtarget) { DebugLoc dl = MI->getDebugLoc(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); // Address into RAX/EAX, other two args into ECX, EDX. unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; @@ -12767,6 +13601,203 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, } MachineBasicBlock * +X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator I = MBB; + ++I; + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + unsigned DstReg; + unsigned MemOpndSlot = 0; + + unsigned CurOp = 0; + + DstReg = MI->getOperand(CurOp++).getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + assert(RC->hasType(MVT::i32) && "Invalid destination!"); + unsigned mainDstReg = MRI.createVirtualRegister(RC); + unsigned restoreDstReg = MRI.createVirtualRegister(RC); + + MemOpndSlot = CurOp; + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + + // For v = setjmp(buf), we generate + // + // thisMBB: + // buf[LabelOffset] = restoreMBB + // SjLjSetup restoreMBB + // + // mainMBB: + // v_main = 0 + // + // sinkMBB: + // v = phi(main, restore) + // + // restoreMBB: + // v_restore = 1 + + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, mainMBB); + MF->insert(I, sinkMBB); + MF->push_back(restoreMBB); + + MachineInstrBuilder MIB; + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // thisMBB: + unsigned PtrStoreOpc = 0; + unsigned LabelReg = 0; + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + Reloc::Model RM = getTargetMachine().getRelocationModel(); + bool UseImmLabel = (getTargetMachine().getCodeModel() == CodeModel::Small) && + (RM == Reloc::Static || RM == Reloc::DynamicNoPIC); + + // Prepare IP either in reg or imm. + if (!UseImmLabel) { + PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; + const TargetRegisterClass *PtrRC = getRegClassFor(PVT); + LabelReg = MRI.createVirtualRegister(PtrRC); + if (Subtarget->is64Bit()) { + MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg) + .addReg(X86::RIP) + .addImm(0) + .addReg(0) + .addMBB(restoreMBB) + .addReg(0); + } else { + const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII); + MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg) + .addReg(XII->getGlobalBaseReg(MF)) + .addImm(0) + .addReg(0) + .addMBB(restoreMBB, Subtarget->ClassifyBlockAddressReference()) + .addReg(0); + } + } else + PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi; + // Store IP + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc)); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) + MIB.addDisp(MI->getOperand(MemOpndSlot + i), LabelOffset); + else + MIB.addOperand(MI->getOperand(MemOpndSlot + i)); + } + if (!UseImmLabel) + MIB.addReg(LabelReg); + else + MIB.addMBB(restoreMBB); + MIB.setMemRefs(MMOBegin, MMOEnd); + // Setup + MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup)) + .addMBB(restoreMBB); + MIB.addRegMask(RegInfo->getNoPreservedMask()); + thisMBB->addSuccessor(mainMBB); + thisMBB->addSuccessor(restoreMBB); + + // mainMBB: + // EAX = 0 + BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg); + mainMBB->addSuccessor(sinkMBB); + + // sinkMBB: + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(X86::PHI), DstReg) + .addReg(mainDstReg).addMBB(mainMBB) + .addReg(restoreDstReg).addMBB(restoreMBB); + + // restoreMBB: + BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1); + BuildMI(restoreMBB, DL, TII->get(X86::JMP_4)).addMBB(sinkMBB); + restoreMBB->addSuccessor(sinkMBB); + + MI->eraseFromParent(); + return sinkMBB; +} + +MachineBasicBlock * +X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + + const TargetRegisterClass *RC = + (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; + unsigned Tmp = MRI.createVirtualRegister(RC); + // Since FP is only updated here but NOT referenced, it's treated as GPR. + unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP; + unsigned SP = RegInfo->getStackRegister(); + + MachineInstrBuilder MIB; + + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + const int64_t SPOffset = 2 * PVT.getStoreSize(); + + unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; + unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r; + + // Reload FP + MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) + MIB.addOperand(MI->getOperand(i)); + MIB.setMemRefs(MMOBegin, MMOEnd); + // Reload IP + MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) + MIB.addDisp(MI->getOperand(i), LabelOffset); + else + MIB.addOperand(MI->getOperand(i)); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + // Reload SP + MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) + MIB.addDisp(MI->getOperand(i), SPOffset); + else + MIB.addOperand(MI->getOperand(i)); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + // Jump + BuildMI(*MBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp); + + MI->eraseFromParent(); + return MBB; +} + +MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { @@ -12895,198 +13926,101 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::PCMPESTRM128REG: case X86::VPCMPESTRM128REG: case X86::PCMPESTRM128MEM: - case X86::VPCMPESTRM128MEM: { - unsigned NumArgs; - bool MemArg; - switch (MI->getOpcode()) { - default: llvm_unreachable("illegal opcode!"); - case X86::PCMPISTRM128REG: - case X86::VPCMPISTRM128REG: - NumArgs = 3; MemArg = false; break; - case X86::PCMPISTRM128MEM: - case X86::VPCMPISTRM128MEM: - NumArgs = 3; MemArg = true; break; - case X86::PCMPESTRM128REG: - case X86::VPCMPESTRM128REG: - NumArgs = 5; MemArg = false; break; - case X86::PCMPESTRM128MEM: - case X86::VPCMPESTRM128MEM: - NumArgs = 5; MemArg = true; break; - } - return EmitPCMP(MI, BB, NumArgs, MemArg); - } - - // Thread synchronization. + case X86::VPCMPESTRM128MEM: + assert(Subtarget->hasSSE42() && + "Target must have SSE4.2 or AVX features enabled"); + return EmitPCMPSTRM(MI, BB, getTargetMachine().getInstrInfo()); + + // String/text processing lowering. + case X86::PCMPISTRIREG: + case X86::VPCMPISTRIREG: + case X86::PCMPISTRIMEM: + case X86::VPCMPISTRIMEM: + case X86::PCMPESTRIREG: + case X86::VPCMPESTRIREG: + case X86::PCMPESTRIMEM: + case X86::VPCMPESTRIMEM: + assert(Subtarget->hasSSE42() && + "Target must have SSE4.2 or AVX features enabled"); + return EmitPCMPSTRI(MI, BB, getTargetMachine().getInstrInfo()); + + // Thread synchronization. case X86::MONITOR: - return EmitMonitor(MI, BB); + return EmitMonitor(MI, BB, getTargetMachine().getInstrInfo(), Subtarget); - // Atomic Lowering. - case X86::ATOMAND32: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, - X86::AND32ri, X86::MOV32rm, - X86::LCMPXCHG32, - X86::NOT32r, X86::EAX, - &X86::GR32RegClass); - case X86::ATOMOR32: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, - X86::OR32ri, X86::MOV32rm, - X86::LCMPXCHG32, - X86::NOT32r, X86::EAX, - &X86::GR32RegClass); - case X86::ATOMXOR32: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, - X86::XOR32ri, X86::MOV32rm, - X86::LCMPXCHG32, - X86::NOT32r, X86::EAX, - &X86::GR32RegClass); - case X86::ATOMNAND32: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, - X86::AND32ri, X86::MOV32rm, - X86::LCMPXCHG32, - X86::NOT32r, X86::EAX, - &X86::GR32RegClass, true); - case X86::ATOMMIN32: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr); - case X86::ATOMMAX32: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr); - case X86::ATOMUMIN32: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr); - case X86::ATOMUMAX32: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr); + // xbegin + case X86::XBEGIN: + return EmitXBegin(MI, BB, getTargetMachine().getInstrInfo()); + // Atomic Lowering. + case X86::ATOMAND8: case X86::ATOMAND16: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, - X86::AND16ri, X86::MOV16rm, - X86::LCMPXCHG16, - X86::NOT16r, X86::AX, - &X86::GR16RegClass); + case X86::ATOMAND32: + case X86::ATOMAND64: + // Fall through + case X86::ATOMOR8: case X86::ATOMOR16: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr, - X86::OR16ri, X86::MOV16rm, - X86::LCMPXCHG16, - X86::NOT16r, X86::AX, - &X86::GR16RegClass); + case X86::ATOMOR32: + case X86::ATOMOR64: + // Fall through case X86::ATOMXOR16: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr, - X86::XOR16ri, X86::MOV16rm, - X86::LCMPXCHG16, - X86::NOT16r, X86::AX, - &X86::GR16RegClass); - case X86::ATOMNAND16: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, - X86::AND16ri, X86::MOV16rm, - X86::LCMPXCHG16, - X86::NOT16r, X86::AX, - &X86::GR16RegClass, true); - case X86::ATOMMIN16: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr); - case X86::ATOMMAX16: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr); - case X86::ATOMUMIN16: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr); - case X86::ATOMUMAX16: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr); - - case X86::ATOMAND8: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, - X86::AND8ri, X86::MOV8rm, - X86::LCMPXCHG8, - X86::NOT8r, X86::AL, - &X86::GR8RegClass); - case X86::ATOMOR8: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr, - X86::OR8ri, X86::MOV8rm, - X86::LCMPXCHG8, - X86::NOT8r, X86::AL, - &X86::GR8RegClass); case X86::ATOMXOR8: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr, - X86::XOR8ri, X86::MOV8rm, - X86::LCMPXCHG8, - X86::NOT8r, X86::AL, - &X86::GR8RegClass); - case X86::ATOMNAND8: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, - X86::AND8ri, X86::MOV8rm, - X86::LCMPXCHG8, - X86::NOT8r, X86::AL, - &X86::GR8RegClass, true); - // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. - // This group is for 64-bit host. - case X86::ATOMAND64: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, - X86::AND64ri32, X86::MOV64rm, - X86::LCMPXCHG64, - X86::NOT64r, X86::RAX, - &X86::GR64RegClass); - case X86::ATOMOR64: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr, - X86::OR64ri32, X86::MOV64rm, - X86::LCMPXCHG64, - X86::NOT64r, X86::RAX, - &X86::GR64RegClass); + case X86::ATOMXOR32: case X86::ATOMXOR64: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr, - X86::XOR64ri32, X86::MOV64rm, - X86::LCMPXCHG64, - X86::NOT64r, X86::RAX, - &X86::GR64RegClass); + // Fall through + case X86::ATOMNAND8: + case X86::ATOMNAND16: + case X86::ATOMNAND32: case X86::ATOMNAND64: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, - X86::AND64ri32, X86::MOV64rm, - X86::LCMPXCHG64, - X86::NOT64r, X86::RAX, - &X86::GR64RegClass, true); - case X86::ATOMMIN64: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr); + // Fall through + case X86::ATOMMAX8: + case X86::ATOMMAX16: + case X86::ATOMMAX32: case X86::ATOMMAX64: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr); - case X86::ATOMUMIN64: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr); + // Fall through + case X86::ATOMMIN8: + case X86::ATOMMIN16: + case X86::ATOMMIN32: + case X86::ATOMMIN64: + // Fall through + case X86::ATOMUMAX8: + case X86::ATOMUMAX16: + case X86::ATOMUMAX32: case X86::ATOMUMAX64: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr); + // Fall through + case X86::ATOMUMIN8: + case X86::ATOMUMIN16: + case X86::ATOMUMIN32: + case X86::ATOMUMIN64: + return EmitAtomicLoadArith(MI, BB); // This group does 64-bit operations on a 32-bit host. case X86::ATOMAND6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::AND32rr, X86::AND32rr, - X86::AND32ri, X86::AND32ri, - false); case X86::ATOMOR6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::OR32rr, X86::OR32rr, - X86::OR32ri, X86::OR32ri, - false); case X86::ATOMXOR6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::XOR32rr, X86::XOR32rr, - X86::XOR32ri, X86::XOR32ri, - false); case X86::ATOMNAND6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::AND32rr, X86::AND32rr, - X86::AND32ri, X86::AND32ri, - true); case X86::ATOMADD6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::ADD32rr, X86::ADC32rr, - X86::ADD32ri, X86::ADC32ri, - false); case X86::ATOMSUB6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::SUB32rr, X86::SBB32rr, - X86::SUB32ri, X86::SBB32ri, - false); + case X86::ATOMMAX6432: + case X86::ATOMMIN6432: + case X86::ATOMUMAX6432: + case X86::ATOMUMIN6432: case X86::ATOMSWAP6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::MOV32rr, X86::MOV32rr, - X86::MOV32ri, X86::MOV32ri, - false); + return EmitAtomicLoadArith6432(MI, BB); + case X86::VASTART_SAVE_XMM_REGS: return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); case X86::VAARG_64: return EmitVAARG64WithCustomInserter(MI, BB); + + case X86::EH_SjLj_SetJmp32: + case X86::EH_SjLj_SetJmp64: + return emitEHSjLjSetJmp(MI, BB); + + case X86::EH_SjLj_LongJmp32: + case X86::EH_SjLj_LongJmp64: + return emitEHSjLjLongJmp(MI, BB); } } @@ -13333,12 +14267,12 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, } -/// DCI, PerformTruncateCombine - Converts truncate operation to +/// PerformTruncateCombine - Converts truncate operation to /// a sequence of vector shuffle operations. /// It is possible when we truncate 256-bit vector to 128-bit vector - -SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, - DAGCombinerInfo &DCI) const { +static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { if (!DCI.isBeforeLegalizeOps()) return SDValue(); @@ -13530,7 +14464,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, // alignment is valid. unsigned Align = LN0->getAlignment(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned NewAlign = TLI.getTargetData()-> + unsigned NewAlign = TLI.getDataLayout()-> getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT)) @@ -13561,6 +14495,14 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return NewOp; SDValue InputVector = N->getOperand(0); + // Detect whether we are trying to convert from mmx to i32 and the bitcast + // from mmx to v2i32 has a single usage. + if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST && + InputVector.getNode()->getOperand(0).getValueType() == MVT::x86mmx && + InputVector.hasOneUse() && N->getValueType(0) == MVT::i32) + return DAG.getNode(X86ISD::MMX_MOVD2W, InputVector.getDebugLoc(), + N->getValueType(0), + InputVector.getNode()->getOperand(0)); // Only operate on vectors of 4 elements, where the alternative shuffling // gets to be more expensive. @@ -13961,7 +14903,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // // where Op could be BRCOND or CMOV. // -static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { +static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { // Quit if not CMP and SUB with its value result used. if (Cmp.getOpcode() != X86ISD::CMP && (Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0))) @@ -13997,40 +14939,55 @@ static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { if (SetCC.getOpcode() == ISD::ZERO_EXTEND) SetCC = SetCC.getOperand(0); - // Quit if not SETCC. - // FIXME: So far we only handle the boolean value generated from SETCC. If - // there is other ways to generate boolean values, we need handle them here - // as well. - if (SetCC.getOpcode() != X86ISD::SETCC) - return SDValue(); - - // Set the condition code or opposite one if necessary. - CC = X86::CondCode(SetCC.getConstantOperandVal(0)); - if (needOppositeCond) - CC = X86::GetOppositeBranchCondition(CC); - - return SetCC.getOperand(1); -} - -static bool IsValidFCMOVCondition(X86::CondCode CC) { - switch (CC) { - default: - return false; - case X86::COND_B: - case X86::COND_BE: - case X86::COND_E: - case X86::COND_P: - case X86::COND_AE: - case X86::COND_A: - case X86::COND_NE: - case X86::COND_NP: - return true; + switch (SetCC.getOpcode()) { + case X86ISD::SETCC: + // Set the condition code or opposite one if necessary. + CC = X86::CondCode(SetCC.getConstantOperandVal(0)); + if (needOppositeCond) + CC = X86::GetOppositeBranchCondition(CC); + return SetCC.getOperand(1); + case X86ISD::CMOV: { + // Check whether false/true value has canonical one, i.e. 0 or 1. + ConstantSDNode *FVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(0)); + ConstantSDNode *TVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(1)); + // Quit if true value is not a constant. + if (!TVal) + return SDValue(); + // Quit if false value is not a constant. + if (!FVal) { + // A special case for rdrand, where 0 is set if false cond is found. + SDValue Op = SetCC.getOperand(0); + if (Op.getOpcode() != X86ISD::RDRAND) + return SDValue(); + } + // Quit if false value is not the constant 0 or 1. + bool FValIsFalse = true; + if (FVal && FVal->getZExtValue() != 0) { + if (FVal->getZExtValue() != 1) + return SDValue(); + // If FVal is 1, opposite cond is needed. + needOppositeCond = !needOppositeCond; + FValIsFalse = false; + } + // Quit if TVal is not the constant opposite of FVal. + if (FValIsFalse && TVal->getZExtValue() != 1) + return SDValue(); + if (!FValIsFalse && TVal->getZExtValue() != 0) + return SDValue(); + CC = X86::CondCode(SetCC.getConstantOperandVal(2)); + if (needOppositeCond) + CC = X86::GetOppositeBranchCondition(CC); + return SetCC.getOperand(3); + } } + + return SDValue(); } /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL] static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { DebugLoc DL = N->getDebugLoc(); // If the flag operand isn't dead, don't touch this CMOV. @@ -14055,10 +15012,10 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, SDValue Flags; - Flags = BoolTestSetCCCombine(Cond, CC); + Flags = checkBoolTestSetCCCombine(Cond, CC); if (Flags.getNode() && // Extra check as FCMOV only supports a subset of X86 cond. - (FalseOp.getValueType() != MVT::f80 || IsValidFCMOVCondition(CC))) { + (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) { SDValue Ops[] = { FalseOp, TrueOp, DAG.getConstant(CC, MVT::i8), Flags }; return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), @@ -14075,6 +15032,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) { CC = X86::GetOppositeBranchCondition(CC); std::swap(TrueC, FalseC); + std::swap(TrueOp, FalseOp); } // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0. @@ -14157,6 +15115,46 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, } } } + + // Handle these cases: + // (select (x != c), e, c) -> select (x != c), e, x), + // (select (x == c), c, e) -> select (x == c), x, e) + // where the c is an integer constant, and the "select" is the combination + // of CMOV and CMP. + // + // The rationale for this change is that the conditional-move from a constant + // needs two instructions, however, conditional-move from a register needs + // only one instruction. + // + // CAVEAT: By replacing a constant with a symbolic value, it may obscure + // some instruction-combining opportunities. This opt needs to be + // postponed as late as possible. + // + if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) { + // the DCI.xxxx conditions are provided to postpone the optimization as + // late as possible. + + ConstantSDNode *CmpAgainst = 0; + if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) && + (CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) && + dyn_cast<ConstantSDNode>(Cond.getOperand(0)) == 0) { + + if (CC == X86::COND_NE && + CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) { + CC = X86::GetOppositeBranchCondition(CC); + std::swap(TrueOp, FalseOp); + } + + if (CC == X86::COND_E && + CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) { + SDValue Ops[] = { FalseOp, Cond.getOperand(0), + DAG.getConstant(CC, MVT::i8), Cond }; + return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops, + array_lengthof(Ops)); + } + } + } + return SDValue(); } @@ -14813,11 +15811,11 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, ISD::LoadExtType Ext = Ld->getExtensionType(); // If this is a vector EXT Load then attempt to optimize it using a - // shuffle. We need SSE4 for the shuffles. + // shuffle. We need SSSE3 shuffles. // TODO: It is possible to support ZExt by zeroing the undef values // during the shuffle phase or after the shuffle. if (RegVT.isVector() && RegVT.isInteger() && - Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) { + Ext == ISD::EXTLOAD && Subtarget->hasSSSE3()) { assert(MemVT != RegVT && "Cannot extend to the same type"); assert(MemVT.isVector() && "Must load a vector from memory"); @@ -15043,7 +16041,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); const Function *F = DAG.getMachineFunction().getFunction(); - bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); + bool NoImplicitFloatOps = F->getFnAttributes(). + hasAttribute(Attributes::NoImplicitFloat); bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps && Subtarget->hasSSE2(); if ((VT.isVector() || @@ -15315,6 +16314,29 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +/// PerformFMinFMaxCombine - Do target-specific dag combines on X86ISD::FMIN and +/// X86ISD::FMAX nodes. +static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) { + assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX); + + // Only perform optimizations if UnsafeMath is used. + if (!DAG.getTarget().Options.UnsafeFPMath) + return SDValue(); + + // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes + // into FMINC and FMAXC, which are Commutative operations. + unsigned NewOp = 0; + switch (N->getOpcode()) { + default: llvm_unreachable("unknown opcode"); + case X86ISD::FMIN: NewOp = X86ISD::FMINC; break; + case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break; + } + + return DAG.getNode(NewOp, N->getDebugLoc(), N->getValueType(0), + N->getOperand(0), N->getOperand(1)); +} + + /// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes. static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { // FAND(0.0, x) -> 0.0 @@ -15420,8 +16442,13 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + EVT ScalarVT = VT.getScalarType(); - if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasFMA()) + if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || + (!Subtarget->hasFMA() && !Subtarget->hasFMA4())) return SDValue(); SDValue A = N->getOperand(0); @@ -15443,9 +16470,10 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode; if (!NegMul) - Opcode = (!NegC)? X86ISD::FMADD : X86ISD::FMSUB; + Opcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB; else - Opcode = (!NegC)? X86ISD::FNMADD : X86ISD::FNMSUB; + Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB; + return DAG.getNode(Opcode, dl, VT, A, B, C); } @@ -15542,24 +16570,51 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +// Helper function of PerformSETCCCombine. It is to materialize "setb reg" +// as "sbb reg,reg", since it can be extended without zext and produces +// an all-ones bit which is more useful than 0/1 in some cases. +static SDValue MaterializeSETB(DebugLoc DL, SDValue EFLAGS, SelectionDAG &DAG) { + return DAG.getNode(ISD::AND, DL, MVT::i8, + DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, + DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS), + DAG.getConstant(1, MVT::i8)); +} + // Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT -static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { DebugLoc DL = N->getDebugLoc(); X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); SDValue EFLAGS = N->getOperand(1); + if (CC == X86::COND_A) { + // Try to convert COND_A into COND_B in an attempt to facilitate + // materializing "setb reg". + // + // Do not flip "e > c", where "c" is a constant, because Cmp instruction + // cannot take an immediate as its first operand. + // + if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && + EFLAGS.getValueType().isInteger() && + !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { + SDValue NewSub = DAG.getNode(X86ISD::SUB, EFLAGS.getDebugLoc(), + EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); + return MaterializeSETB(DL, NewEFLAGS, DAG); + } + } + // Materialize "setb reg" as "sbb reg,reg", since it can be extended without // a zext and produces an all-ones bit which is more useful than 0/1 in some // cases. if (CC == X86::COND_B) - return DAG.getNode(ISD::AND, DL, MVT::i8, - DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, - DAG.getConstant(CC, MVT::i8), EFLAGS), - DAG.getConstant(1, MVT::i8)); + return MaterializeSETB(DL, EFLAGS, DAG); SDValue Flags; - Flags = BoolTestSetCCCombine(EFLAGS, CC); + Flags = checkBoolTestSetCCCombine(EFLAGS, CC); if (Flags.getNode()) { SDValue Cond = DAG.getConstant(CC, MVT::i8); return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags); @@ -15581,7 +16636,7 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG, SDValue Flags; - Flags = BoolTestSetCCCombine(EFLAGS, CC); + Flags = checkBoolTestSetCCCombine(EFLAGS, CC); if (Flags.getNode()) { SDValue Cond = DAG.getConstant(CC, MVT::i8); return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond, @@ -15591,23 +16646,6 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG) { - SDValue Op0 = N->getOperand(0); - EVT InVT = Op0->getValueType(0); - - // UINT_TO_FP(v4i8) -> SINT_TO_FP(ZEXT(v4i8 to v4i32)) - if (InVT == MVT::v8i8 || InVT == MVT::v4i8) { - DebugLoc dl = N->getDebugLoc(); - MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32; - SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0); - // Notice that we use SINT_TO_FP because we know that the high bits - // are zero and SINT_TO_FP is better supported by the hardware. - return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P); - } - - return SDValue(); -} - static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86TargetLowering *XTLI) { SDValue Op0 = N->getOperand(0); @@ -15639,20 +16677,6 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue PerformFP_TO_SINTCombine(SDNode *N, SelectionDAG &DAG) { - EVT VT = N->getValueType(0); - - // v4i8 = FP_TO_SINT() -> v4i8 = TRUNCATE (V4i32 = FP_TO_SINT() - if (VT == MVT::v8i8 || VT == MVT::v4i8) { - DebugLoc dl = N->getDebugLoc(); - MVT DstVT = VT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32; - SDValue I = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, N->getOperand(0)); - return DAG.getNode(ISD::TRUNCATE, dl, VT, I); - } - - return SDValue(); -} - // Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG, X86TargetLowering::DAGCombinerInfo &DCI) { @@ -15767,6 +16791,21 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG, return OptimizeConditionalInDecrement(N, DAG); } +/// performVZEXTCombine - Performs build vector combines +static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { + // (vzext (bitcast (vzext (x)) -> (vzext x) + SDValue In = N->getOperand(0); + while (In.getOpcode() == ISD::BITCAST) + In = In.getOperand(0); + + if (In.getOpcode() != X86ISD::VZEXT) + return SDValue(); + + return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), In.getOperand(0)); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -15776,7 +16815,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI); case ISD::VSELECT: case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget); - case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); + case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget); case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget); case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget); case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI); @@ -15789,23 +16828,24 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget); case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); - case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG); case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this); - case ISD::FP_TO_SINT: return PerformFP_TO_SINTCombine(N, DAG); case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget); case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG); + case X86ISD::FMIN: + case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG); case X86ISD::FAND: return PerformFANDCombine(N, DAG); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget); - case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI); + case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget); case ISD::SETCC: return PerformISDSETCCCombine(N, DAG); - case X86ISD::SETCC: return PerformSETCCCombine(N, DAG); + case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget); case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget); + case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::PALIGN: case X86ISD::UNPCKH: @@ -16233,7 +17273,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'K': if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { - if ((int8_t)C->getSExtValue() == C->getSExtValue()) { + if (isInt<8>(C->getSExtValue())) { Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); break; } @@ -16558,3 +17598,207 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return Res; } + +//===----------------------------------------------------------------------===// +// +// X86 cost model. +// +//===----------------------------------------------------------------------===// + +struct X86CostTblEntry { + int ISD; + MVT Type; + unsigned Cost; +}; + +static int +FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { + for (unsigned int i = 0; i < len; ++i) + if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty) + return i; + + // Could not find an entry. + return -1; +} + +struct X86TypeConversionCostTblEntry { + int ISD; + MVT Dst; + MVT Src; + unsigned Cost; +}; + +static int +FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, + int ISD, MVT Dst, MVT Src) { + for (unsigned int i = 0; i < len; ++i) + if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst) + return i; + + // Could not find an entry. + return -1; +} + +unsigned +X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, + Type *Ty) const { + // Legalize the type. + std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Ty); + + int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>(); + + static const X86CostTblEntry AVX1CostTable[] = { + // We don't have to scalarize unsupported ops. We can issue two half-sized + // operations and we only need to extract the upper YMM half. + // Two ops + 1 extract + 1 insert = 4. + { ISD::MUL, MVT::v8i32, 4 }, + { ISD::SUB, MVT::v8i32, 4 }, + { ISD::ADD, MVT::v8i32, 4 }, + { ISD::MUL, MVT::v4i64, 4 }, + { ISD::SUB, MVT::v4i64, 4 }, + { ISD::ADD, MVT::v4i64, 4 }, + }; + + // Look for AVX1 lowering tricks. + if (ST.hasAVX()) { + int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, + LT.second); + if (Idx != -1) + return LT.first * AVX1CostTable[Idx].Cost; + } + // Fallback to the default implementation. + return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty); +} + +unsigned +X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + assert(Val->isVectorTy() && "This must be a vector type"); + + if (Index != -1U) { + // Legalize the type. + std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Val); + + // This type is legalized to a scalar type. + if (!LT.second.isVector()) + return 0; + + // The type may be split. Normalize the index to the new type. + unsigned Width = LT.second.getVectorNumElements(); + Index = Index % Width; + + // Floating point scalars are already located in index #0. + if (Val->getScalarType()->isFloatingPointTy() && Index == 0) + return 0; + } + + return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index); +} + +unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, + Type *ValTy, + Type *CondTy) const { + // Legalize the type. + std::pair<unsigned, MVT> LT = getTypeLegalizationCost(ValTy); + + MVT MTy = LT.second; + + int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + const X86Subtarget &ST = + TLI->getTargetMachine().getSubtarget<X86Subtarget>(); + + static const X86CostTblEntry SSE42CostTbl[] = { + { ISD::SETCC, MVT::v2f64, 1 }, + { ISD::SETCC, MVT::v4f32, 1 }, + { ISD::SETCC, MVT::v2i64, 1 }, + { ISD::SETCC, MVT::v4i32, 1 }, + { ISD::SETCC, MVT::v8i16, 1 }, + { ISD::SETCC, MVT::v16i8, 1 }, + }; + + static const X86CostTblEntry AVX1CostTbl[] = { + { ISD::SETCC, MVT::v4f64, 1 }, + { ISD::SETCC, MVT::v8f32, 1 }, + // AVX1 does not support 8-wide integer compare. + { ISD::SETCC, MVT::v4i64, 4 }, + { ISD::SETCC, MVT::v8i32, 4 }, + { ISD::SETCC, MVT::v16i16, 4 }, + { ISD::SETCC, MVT::v32i8, 4 }, + }; + + static const X86CostTblEntry AVX2CostTbl[] = { + { ISD::SETCC, MVT::v4i64, 1 }, + { ISD::SETCC, MVT::v8i32, 1 }, + { ISD::SETCC, MVT::v16i16, 1 }, + { ISD::SETCC, MVT::v32i8, 1 }, + }; + + if (ST.hasSSE42()) { + int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * SSE42CostTbl[Idx].Cost; + } + + if (ST.hasAVX()) { + int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX1CostTbl[Idx].Cost; + } + + if (ST.hasAVX2()) { + int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX2CostTbl[Idx].Cost; + } + + return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode, + Type *Dst, + Type *Src) const { + int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + EVT SrcTy = TLI->getValueType(Src); + EVT DstTy = TLI->getValueType(Dst); + + if (!SrcTy.isSimple() || !DstTy.isSimple()) + return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src); + + const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>(); + + static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, + { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, + }; + + if (ST.hasAVX()) { + int Idx = FindInConvertTable(AVXConversionTbl, + array_lengthof(AVXConversionTbl), + ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + if (Idx != -1) + return AVXConversionTbl[Idx].Cost; + } + + return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src); +} + diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h index 896d067..465c603 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h @@ -19,6 +19,7 @@ #include "X86RegisterInfo.h" #include "X86MachineFunctionInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetTransformImpl.h" #include "llvm/Target/TargetOptions.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -142,6 +143,10 @@ namespace llvm { /// mnemonic, so do I; blame Intel. MOVDQ2Q, + /// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX + /// vector to a GPR. + MMX_MOVD2W, + /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to /// i32, corresponds to X86::PEXTRB. PEXTRB, @@ -195,6 +200,9 @@ namespace llvm { /// FMAX, FMIN, + /// FMAXC, FMINC - Commutative FMIN and FMAX. + FMAXC, FMINC, + /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal /// approximation. Note that these typically require refinement /// in order to obtain suitable precision. @@ -214,6 +222,12 @@ namespace llvm { // EH_RETURN - Exception Handling helpers. EH_RETURN, + // EH_SJLJ_SETJMP - SjLj exception handling setjmp. + EH_SJLJ_SETJMP, + + // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. + EH_SJLJ_LONGJMP, + /// TC_RETURN - Tail call return. /// operand #0 chain /// operand #1 callee (register or absolute) @@ -227,9 +241,18 @@ namespace llvm { // VSEXT_MOVL - Vector move low and sign extend. VSEXT_MOVL, + // VZEXT - Vector integer zero-extend. + VZEXT, + + // VSEXT - Vector integer signed-extend. + VSEXT, + // VFPEXT - Vector FP extend. VFPEXT, + // VFPROUND - Vector FP round. + VFPROUND, + // VSHL, VSRL - 128-bit vector logical left / right shift VSHLDQ, VSRLDQ, @@ -345,6 +368,10 @@ namespace llvm { ATOMXOR64_DAG, ATOMAND64_DAG, ATOMNAND64_DAG, + ATOMMAX64_DAG, + ATOMMIN64_DAG, + ATOMUMAX64_DAG, + ATOMUMIN64_DAG, ATOMSWAP64_DAG, // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap. @@ -458,10 +485,6 @@ namespace llvm { getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const; - /// getStackPtrReg - Return the stack pointer register we are using: either - /// ESP or RSP. - unsigned getStackPtrReg() const { return X86StackPtr; } - /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. For X86, aggregates /// that contains are placed at 16-byte boundaries while the rest are at @@ -694,10 +717,7 @@ namespace llvm { /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; const X86RegisterInfo *RegInfo; - const TargetData *TD; - - /// X86StackPtr - X86 physical register used as stack ptr. - unsigned X86StackPtr; + const DataLayout *TD; /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 /// floating point ops. @@ -741,6 +761,7 @@ namespace llvm { bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, + Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -760,15 +781,11 @@ namespace llvm { SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, @@ -782,12 +799,15 @@ namespace llvm { SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToBT(SDValue And, ISD::CondCode CC, DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; @@ -799,39 +819,26 @@ namespace llvm { SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; - SDValue PerformTruncateCombine(SDNode* N, SelectionDAG &DAG, DAGCombinerInfo &DCI) const; - // Utility functions to help LowerVECTOR_SHUFFLE - SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const; + // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR + SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const; SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const; + SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const; + SDValue lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, @@ -864,51 +871,17 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const; - /// Utility function to emit string processing sse4.2 instructions - /// that return in xmm0. - /// This takes the instruction to expand, the associated machine basic - /// block, the number of args, and whether or not the second arg is - /// in memory or not. - MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB, - unsigned argNum, bool inMem) const; - - /// Utility functions to emit monitor and mwait instructions. These - /// need to make sure that the arguments to the intrinsic are in the - /// correct registers. - MachineBasicBlock *EmitMonitor(MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const; - - /// Utility function to emit atomic bitwise operations (and, or, xor). - /// It takes the bitwise instruction to expand, the associated machine basic - /// block, and the associated X86 opcodes for reg/reg and reg/imm. - MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter( - MachineInstr *BInstr, - MachineBasicBlock *BB, - unsigned regOpc, - unsigned immOpc, - unsigned loadOpc, - unsigned cxchgOpc, - unsigned notOpc, - unsigned EAXreg, - const TargetRegisterClass *RC, - bool Invert = false) const; - - MachineBasicBlock *EmitAtomicBit6432WithCustomInserter( - MachineInstr *BInstr, - MachineBasicBlock *BB, - unsigned regOpcL, - unsigned regOpcH, - unsigned immOpcL, - unsigned immOpcH, - bool Invert = false) const; - - /// Utility function to emit atomic min and max. It takes the min/max - /// instruction to expand, the associated basic block, and the associated - /// cmov opcode for moving the min or max value. - MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr, - MachineBasicBlock *BB, - unsigned cmovOpc) const; + /// Utility function to emit atomic-load-arith operations (and, or, xor, + /// nand, max, min, umax, umin). It takes the corresponding instruction to + /// expand, the associated machine basic block, and the associated X86 + /// opcodes for reg/reg. + MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + /// Utility function to emit atomic-load-arith operations (and, or, xor, + /// nand, add, sub, swap) for 64-bit operands on 32-bit target. + MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI, + MachineBasicBlock *MBB) const; // Utility function to emit the low-level va_arg code for X86-64. MachineBasicBlock *EmitVAARG64WithCustomInserter( @@ -936,6 +909,12 @@ namespace llvm { MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const; @@ -953,6 +932,23 @@ namespace llvm { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); } + + class X86VectorTargetTransformInfo : public VectorTargetTransformImpl { + public: + explicit X86VectorTargetTransformInfo(const TargetLowering *TL) : + VectorTargetTransformImpl(TL) {} + + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + }; } #endif // X86ISELLOWERING_H diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td index d78264f..9e6f279 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td @@ -165,6 +165,33 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), } +let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in { + def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf), + "#EH_SJLJ_SETJMP32", + [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, + Requires<[In32BitMode]>; + def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf), + "#EH_SJLJ_SETJMP64", + [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, + Requires<[In64BitMode]>; + let isTerminator = 1 in { + def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf), + "#EH_SJLJ_LONGJMP32", + [(X86eh_sjlj_longjmp addr:$buf)]>, + Requires<[In32BitMode]>; + def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf), + "#EH_SJLJ_LONGJMP64", + [(X86eh_sjlj_longjmp addr:$buf)]>, + Requires<[In64BitMode]>; + } +} + +let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in { + def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst), + "#EH_SjLj_Setup\t$dst", []>; +} + //===----------------------------------------------------------------------===// // Pseudo instructions used by segmented stacks. // @@ -230,25 +257,18 @@ def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), IIC_ALU_NONMEM>; // Use sbb to materialize carry bit. -let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in { +let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1 in { // FIXME: These are pseudo ops that should be replaced with Pat<> patterns. // However, Pat<> can't replicate the destination reg into the inputs of the // result. -// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces -// X86CodeEmitter. -def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "", - [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))], - IIC_ALU_NONMEM>; -def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))], - IIC_ALU_NONMEM>, - OpSize; -def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))], - IIC_ALU_NONMEM>; -def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))], - IIC_ALU_NONMEM>; +def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "", + [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "", + [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "", + [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "", + [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; } // isCodeGenOnly @@ -489,130 +509,74 @@ def CMOV_RFP80 : I<0, Pseudo, // Atomic Instruction Pseudo Instructions //===----------------------------------------------------------------------===// -// Atomic exchange, and, or, xor -let Constraints = "$val = $dst", Defs = [EFLAGS], - usesCustomInserter = 1 in { - -def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMAND8 PSEUDO!", - [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>; -def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMOR8 PSEUDO!", - [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>; -def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMXOR8 PSEUDO!", - [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>; -def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMNAND8 PSEUDO!", - [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>; - -def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMAND16 PSEUDO!", - [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>; -def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMOR16 PSEUDO!", - [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>; -def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMXOR16 PSEUDO!", - [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>; -def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMNAND16 PSEUDO!", - [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>; -def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), - "#ATOMMIN16 PSEUDO!", - [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>; -def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMMAX16 PSEUDO!", - [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>; -def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMUMIN16 PSEUDO!", - [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>; -def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMUMAX16 PSEUDO!", - [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>; - - -def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMAND32 PSEUDO!", - [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>; -def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMOR32 PSEUDO!", - [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>; -def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMXOR32 PSEUDO!", - [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>; -def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMNAND32 PSEUDO!", - [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>; -def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), - "#ATOMMIN32 PSEUDO!", - [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>; -def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMMAX32 PSEUDO!", - [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>; -def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMUMIN32 PSEUDO!", - [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>; -def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMUMAX32 PSEUDO!", - [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>; - - - -def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMAND64 PSEUDO!", - [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>; -def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMOR64 PSEUDO!", - [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>; -def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMXOR64 PSEUDO!", - [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>; -def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMNAND64 PSEUDO!", - [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>; -def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val), - "#ATOMMIN64 PSEUDO!", - [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>; -def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMMAX64 PSEUDO!", - [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>; -def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMUMIN64 PSEUDO!", - [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>; -def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMUMAX64 PSEUDO!", - [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>; +// Pseudo atomic instructions + +multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> { + let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in { + def #NAME#8 : I<0, Pseudo, (outs GR8:$dst), + (ins i8mem:$ptr, GR8:$val), + !strconcat(mnemonic, "8 PSEUDO!"), []>; + def #NAME#16 : I<0, Pseudo,(outs GR16:$dst), + (ins i16mem:$ptr, GR16:$val), + !strconcat(mnemonic, "16 PSEUDO!"), []>; + def #NAME#32 : I<0, Pseudo, (outs GR32:$dst), + (ins i32mem:$ptr, GR32:$val), + !strconcat(mnemonic, "32 PSEUDO!"), []>; + def #NAME#64 : I<0, Pseudo, (outs GR64:$dst), + (ins i64mem:$ptr, GR64:$val), + !strconcat(mnemonic, "64 PSEUDO!"), []>; + } +} + +multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS<string name, string frag> { + def : Pat<(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val), + (!cast<Instruction>(name # "8") addr:$ptr, GR8:$val)>; + def : Pat<(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val), + (!cast<Instruction>(name # "16") addr:$ptr, GR16:$val)>; + def : Pat<(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val), + (!cast<Instruction>(name # "32") addr:$ptr, GR32:$val)>; + def : Pat<(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val), + (!cast<Instruction>(name # "64") addr:$ptr, GR64:$val)>; } -let Constraints = "$val1 = $dst1, $val2 = $dst2", - Defs = [EFLAGS, EAX, EBX, ECX, EDX], - Uses = [EAX, EBX, ECX, EDX], - mayLoad = 1, mayStore = 1, - usesCustomInserter = 1 in { -def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMAND6432 PSEUDO!", []>; -def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMOR6432 PSEUDO!", []>; -def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMXOR6432 PSEUDO!", []>; -def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMNAND6432 PSEUDO!", []>; -def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMADD6432 PSEUDO!", []>; -def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMSUB6432 PSEUDO!", []>; -def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMSWAP6432 PSEUDO!", []>; +// Atomic exchange, and, or, xor +defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMAND">; +defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMOR">; +defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMXOR">; +defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMNAND">; +defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMAX">; +defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMIN">; +defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMAX">; +defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMIN">; + +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMAND", "atomic_load_and">; +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMOR", "atomic_load_or">; +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMXOR", "atomic_load_xor">; +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMNAND", "atomic_load_nand">; +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMAX", "atomic_load_max">; +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMIN", "atomic_load_min">; +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">; +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">; + +multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> { + let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in + def #NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + !strconcat(mnemonic, "6432 PSEUDO!"), []>; } +defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">; +defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMOR">; +defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMXOR">; +defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMNAND">; +defm ATOMADD : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMADD">; +defm ATOMSUB : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSUB">; +defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMAX">; +defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMIN">; +defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMAX">; +defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMIN">; +defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">; + //===----------------------------------------------------------------------===// // Normal-Instructions-With-Lock-Prefix Pseudo Instructions //===----------------------------------------------------------------------===// @@ -624,7 +588,6 @@ def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), // TODO: Get this to fold the constant into the instruction. let isCodeGenOnly = 1, Defs = [EFLAGS] in def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), - "lock\n\t" "or{l}\t{$zero, $dst|$dst, $zero}", [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK; @@ -644,72 +607,72 @@ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 }, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), - !strconcat("lock\n\t", mnemonic, "{b}\t", + !strconcat(mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - !strconcat("lock\n\t", mnemonic, "{w}\t", + !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, OpSize, LOCK; def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - !strconcat("lock\n\t", mnemonic, "{l}\t", + !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - !strconcat("lock\n\t", mnemonic, "{q}\t", + !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{b}\t", + !strconcat(mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), - !strconcat("lock\n\t", mnemonic, "{w}\t", + !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; + [], IIC_ALU_MEM>, OpSize, LOCK; def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), - !strconcat("lock\n\t", mnemonic, "{l}\t", + !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), - !strconcat("lock\n\t", mnemonic, "{q}\t", + !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{w}\t", + !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; + [], IIC_ALU_MEM>, OpSize, LOCK; def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{l}\t", + !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{q}\t", + !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; @@ -724,107 +687,117 @@ defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">; defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">; // Optimized codegen when the non-memory output is not used. +multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form, + string mnemonic> { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { -def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), - "lock\n\t" - "inc{b}\t$dst", [], IIC_UNARY_MEM>, LOCK; -def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), - "lock\n\t" - "inc{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK; -def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), - "lock\n\t" - "inc{l}\t$dst", [], IIC_UNARY_MEM>, LOCK; -def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), - "lock\n\t" - "inc{q}\t$dst", [], IIC_UNARY_MEM>, LOCK; - -def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), - "lock\n\t" - "dec{b}\t$dst", [], IIC_UNARY_MEM>, LOCK; -def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), - "lock\n\t" - "dec{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK; -def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), - "lock\n\t" - "dec{l}\t$dst", [], IIC_UNARY_MEM>, LOCK; -def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), - "lock\n\t" - "dec{q}\t$dst", [], IIC_UNARY_MEM>, LOCK; +def #NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst), + !strconcat(mnemonic, "{b}\t$dst"), + [], IIC_UNARY_MEM>, LOCK; +def #NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst), + !strconcat(mnemonic, "{w}\t$dst"), + [], IIC_UNARY_MEM>, OpSize, LOCK; +def #NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst), + !strconcat(mnemonic, "{l}\t$dst"), + [], IIC_UNARY_MEM>, LOCK; +def #NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst), + !strconcat(mnemonic, "{q}\t$dst"), + [], IIC_UNARY_MEM>, LOCK; +} } -// Atomic compare and swap. -let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], - isCodeGenOnly = 1 in -def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), - "lock\n\t" - "cmpxchg8b\t$ptr", - [(X86cas8 addr:$ptr)], IIC_CMPX_LOCK_8B>, TB, LOCK; +defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "inc">; +defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "dec">; -let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], - isCodeGenOnly = 1 in -def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr), - "lock\n\t" - "cmpxchg16b\t$ptr", - [(X86cas16 addr:$ptr)], IIC_CMPX_LOCK_16B>, TB, LOCK, - Requires<[HasCmpxchg16b]>; - -let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in { -def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap), - "lock\n\t" - "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR8:$swap, 1)], IIC_CMPX_LOCK_8>, TB, LOCK; +// Atomic compare and swap. +multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic, + SDPatternOperator frag, X86MemOperand x86memop, + InstrItinClass itin> { +let isCodeGenOnly = 1 in { + def #NAME# : I<Opc, Form, (outs), (ins x86memop:$ptr), + !strconcat(mnemonic, "\t$ptr"), + [(frag addr:$ptr)], itin>, TB, LOCK; +} } -let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in { -def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap), - "lock\n\t" - "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR16:$swap, 2)], IIC_CMPX_LOCK>, TB, OpSize, LOCK; +multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form, + string mnemonic, SDPatternOperator frag, + InstrItinClass itin8, InstrItinClass itin> { +let isCodeGenOnly = 1 in { + let Defs = [AL, EFLAGS], Uses = [AL] in + def #NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap), + !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"), + [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK; + let Defs = [AX, EFLAGS], Uses = [AX] in + def #NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap), + !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"), + [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK; + let Defs = [EAX, EFLAGS], Uses = [EAX] in + def #NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap), + !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"), + [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK; + let Defs = [RAX, EFLAGS], Uses = [RAX] in + def #NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap), + !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"), + [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK; +} } -let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in { -def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), - "lock\n\t" - "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR32:$swap, 4)], IIC_CMPX_LOCK>, TB, LOCK; +let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in { +defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", + X86cas8, i64mem, + IIC_CMPX_LOCK_8B>; } -let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in { -def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), - "lock\n\t" - "cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR64:$swap, 8)], IIC_CMPX_LOCK>, TB, LOCK; +let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], + Predicates = [HasCmpxchg16b] in { +defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b", + X86cas16, i128mem, + IIC_CMPX_LOCK_16B>, REX_W; } +defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", + X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>; + // Atomic exchange and add -let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in { -def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), - "lock\n\t" - "xadd{b}\t{$val, $ptr|$ptr, $val}", - [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))], - IIC_XADD_LOCK_MEM8>, - TB, LOCK; -def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr), - "lock\n\t" - "xadd{w}\t{$val, $ptr|$ptr, $val}", - [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))], - IIC_XADD_LOCK_MEM>, - TB, OpSize, LOCK; -def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr), - "lock\n\t" - "xadd{l}\t{$val, $ptr|$ptr, $val}", - [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))], - IIC_XADD_LOCK_MEM>, - TB, LOCK; -def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr), - "lock\n\t" - "xadd{q}\t{$val, $ptr|$ptr, $val}", - [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))], - IIC_XADD_LOCK_MEM>, - TB, LOCK; +multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic, + string frag, + InstrItinClass itin8, InstrItinClass itin> { + let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in { + def #NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst), + (ins GR8:$val, i8mem:$ptr), + !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"), + [(set GR8:$dst, + (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))], + itin8>; + def #NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst), + (ins GR16:$val, i16mem:$ptr), + !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"), + [(set + GR16:$dst, + (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))], + itin>, OpSize; + def #NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$val, i32mem:$ptr), + !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"), + [(set + GR32:$dst, + (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))], + itin>; + def #NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$val, i64mem:$ptr), + !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"), + [(set + GR64:$dst, + (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))], + itin>; + } } +defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add", + IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>, + TB, LOCK; + def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src), "#ACQUIRE_MOV PSEUDO!", [(set GR8:$dst, (atomic_load_8 addr:$src))]>; @@ -1024,7 +997,24 @@ def : Pat<(X86call (i64 tglobaladdr:$dst)), def : Pat<(X86call (i64 texternalsym:$dst)), (CALL64pcrel32 texternalsym:$dst)>; -// tailcall stuff +// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they +// can never use callee-saved registers. That is the purpose of the GR64_TC +// register classes. +// +// The only volatile register that is never used by the calling convention is +// %r11. This happens when calling a vararg function with 6 arguments. +// +// Match an X86tcret that uses less than 7 volatile registers. +def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off), + (X86tcret node:$ptr, node:$off), [{ + // X86tcret args: (*chain, ptr, imm, regs..., glue) + unsigned NumRegs = 0; + for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i) + if (isa<RegisterSDNode>(N->getOperand(i)) && ++NumRegs > 6) + return false; + return true; +}]>; + def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>, Requires<[In32BitMode]>; @@ -1048,7 +1038,9 @@ def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, Requires<[In64BitMode]>; -def : Pat<(X86tcret (load addr:$dst), imm:$off), +// Don't fold loads into X86tcret requiring more than 6 regs. +// There wouldn't be enough scratch registers for base+index. +def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off), (TCRETURNmi64 addr:$dst, imm:$off)>, Requires<[In64BitMode]>; diff --git a/contrib/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm/lib/Target/X86/X86InstrControl.td index b0c27c8..bfe9541 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrControl.td +++ b/contrib/llvm/lib/Target/X86/X86InstrControl.td @@ -16,15 +16,18 @@ // // Return instructions. +// +// The X86retflag return instructions are variadic because we may add ST0 and +// ST1 arguments when returning values on the x87 stack. let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, FPForm = SpecialFP in { - def RET : I <0xC3, RawFrm, (outs), (ins), + def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret", [(X86retflag 0)], IIC_RET>; def RETW : I <0xC3, RawFrm, (outs), (ins), "ret{w}", [], IIC_RET>, OpSize; - def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt), + def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "ret\t$amt", [(X86retflag timm:$amt)], IIC_RET_IMM>; def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt), diff --git a/contrib/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm/lib/Target/X86/X86InstrFMA.td index 265b4bb..959d91a 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFMA.td +++ b/contrib/llvm/lib/Target/X86/X86InstrFMA.td @@ -16,243 +16,180 @@ //===----------------------------------------------------------------------===// let Constraints = "$src1 = $dst" in { -multiclass fma3p_rm<bits<8> opc, string OpcodeStr> { -let neverHasSideEffects = 1 in { - def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; - let mayLoad = 1 in - def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, f128mem:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; - def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, VR256:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; - let mayLoad = 1 in - def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, f256mem:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; -} // neverHasSideEffects = 1 -} - -// Intrinsic for 213 pattern -multiclass fma3p_rm_int<bits<8> opc, string OpcodeStr, - PatFrag MemFrag128, PatFrag MemFrag256, - Intrinsic Int128, Intrinsic Int256, SDNode Op213, - ValueType OpVT128, ValueType OpVT256> { - def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, (Int128 VR128:$src2, VR128:$src1, - VR128:$src3))]>; - +multiclass fma3p_rm<bits<8> opc, string OpcodeStr, + PatFrag MemFrag128, PatFrag MemFrag256, + ValueType OpVT128, ValueType OpVT256, + SDPatternOperator Op = null_frag> { + let isCommutable = 1 in def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, (OpVT128 (Op213 VR128:$src2, + [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1, VR128:$src3)))]>; - def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, f128mem:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, (Int128 VR128:$src2, VR128:$src1, - (MemFrag128 addr:$src3)))]>; - + let mayLoad = 1 in def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, f128mem:$src3), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, (OpVT128 (Op213 VR128:$src2, VR128:$src1, + [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1, (MemFrag128 addr:$src3))))]>; - - def rY_Int : FMA3<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, VR256:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR256:$dst, (Int256 VR256:$src2, VR256:$src1, - VR256:$src3))]>; - + let isCommutable = 1 in def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR256:$dst, (OpVT256 (Op213 VR256:$src2, VR256:$src1, - VR256:$src3)))]>; - - def mY_Int : FMA3<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, f256mem:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR256:$dst, (Int256 VR256:$src2, VR256:$src1, - (MemFrag256 addr:$src3)))]>; + [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1, + VR256:$src3)))]>, VEX_L; + let mayLoad = 1 in def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, f256mem:$src3), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR256:$dst, - (OpVT256 (Op213 VR256:$src2, VR256:$src1, - (MemFrag256 addr:$src3))))]>; + (OpVT256 (Op VR256:$src2, VR256:$src1, + (MemFrag256 addr:$src3))))]>, VEX_L; } } // Constraints = "$src1 = $dst" multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, string OpcodeStr, string PackTy, PatFrag MemFrag128, PatFrag MemFrag256, - Intrinsic Int128, Intrinsic Int256, SDNode Op, - ValueType OpTy128, ValueType OpTy256> { - defm r213 : fma3p_rm_int <opc213, !strconcat(OpcodeStr, - !strconcat("213", PackTy)), MemFrag128, MemFrag256, - Int128, Int256, Op, OpTy128, OpTy256>; - defm r132 : fma3p_rm <opc132, - !strconcat(OpcodeStr, !strconcat("132", PackTy))>; - defm r231 : fma3p_rm <opc231, - !strconcat(OpcodeStr, !strconcat("231", PackTy))>; + SDNode Op, ValueType OpTy128, ValueType OpTy256> { + defm r213 : fma3p_rm<opc213, + !strconcat(OpcodeStr, !strconcat("213", PackTy)), + MemFrag128, MemFrag256, OpTy128, OpTy256, Op>; +let neverHasSideEffects = 1 in { + defm r132 : fma3p_rm<opc132, + !strconcat(OpcodeStr, !strconcat("132", PackTy)), + MemFrag128, MemFrag256, OpTy128, OpTy256>; + defm r231 : fma3p_rm<opc231, + !strconcat(OpcodeStr, !strconcat("231", PackTy)), + MemFrag128, MemFrag256, OpTy128, OpTy256>; +} // neverHasSideEffects = 1 } // Fused Multiply-Add let ExeDomain = SSEPackedSingle in { defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32, - memopv8f32, int_x86_fma_vfmadd_ps, - int_x86_fma_vfmadd_ps_256, X86Fmadd, - v4f32, v8f32>; + memopv8f32, X86Fmadd, v4f32, v8f32>; defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32, - memopv8f32, int_x86_fma_vfmsub_ps, - int_x86_fma_vfmsub_ps_256, X86Fmsub, - v4f32, v8f32>; + memopv8f32, X86Fmsub, v4f32, v8f32>; defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", - memopv4f32, memopv8f32, - int_x86_fma_vfmaddsub_ps, - int_x86_fma_vfmaddsub_ps_256, X86Fmaddsub, + memopv4f32, memopv8f32, X86Fmaddsub, v4f32, v8f32>; defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", - memopv4f32, memopv8f32, - int_x86_fma_vfmsubadd_ps, - int_x86_fma_vfmaddsub_ps_256, X86Fmsubadd, + memopv4f32, memopv8f32, X86Fmsubadd, v4f32, v8f32>; } let ExeDomain = SSEPackedDouble in { defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64, - memopv4f64, int_x86_fma_vfmadd_pd, - int_x86_fma_vfmadd_pd_256, X86Fmadd, v2f64, - v4f64>, VEX_W; + memopv4f64, X86Fmadd, v2f64, v4f64>, VEX_W; defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64, - memopv4f64, int_x86_fma_vfmsub_pd, - int_x86_fma_vfmsub_pd_256, X86Fmsub, v2f64, - v4f64>, VEX_W; + memopv4f64, X86Fmsub, v2f64, v4f64>, VEX_W; defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", - memopv2f64, memopv4f64, - int_x86_fma_vfmaddsub_pd, - int_x86_fma_vfmaddsub_pd_256, X86Fmaddsub, + memopv2f64, memopv4f64, X86Fmaddsub, v2f64, v4f64>, VEX_W; defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", - memopv2f64, memopv4f64, - int_x86_fma_vfmsubadd_pd, - int_x86_fma_vfmsubadd_pd_256, X86Fmsubadd, + memopv2f64, memopv4f64, X86Fmsubadd, v2f64, v4f64>, VEX_W; } // Fused Negative Multiply-Add let ExeDomain = SSEPackedSingle in { defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", memopv4f32, - memopv8f32, int_x86_fma_vfnmadd_ps, - int_x86_fma_vfnmadd_ps_256, X86Fnmadd, v4f32, - v8f32>; + memopv8f32, X86Fnmadd, v4f32, v8f32>; defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", memopv4f32, - memopv8f32, int_x86_fma_vfnmsub_ps, - int_x86_fma_vfnmsub_ps_256, X86Fnmsub, v4f32, - v8f32>; + memopv8f32, X86Fnmsub, v4f32, v8f32>; } let ExeDomain = SSEPackedDouble in { defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64, - memopv4f64, int_x86_fma_vfnmadd_pd, - int_x86_fma_vfnmadd_pd_256, X86Fnmadd, v2f64, - v4f64>, VEX_W; + memopv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W; defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", - memopv2f64, - memopv4f64, int_x86_fma_vfnmsub_pd, - int_x86_fma_vfnmsub_pd_256, X86Fnmsub, v2f64, + memopv2f64, memopv4f64, X86Fnmsub, v2f64, v4f64>, VEX_W; } let Constraints = "$src1 = $dst" in { multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop, - RegisterClass RC> { -let neverHasSideEffects = 1 in { - def r : FMA3<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, RC:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; + RegisterClass RC, ValueType OpVT, PatFrag mem_frag, + SDPatternOperator OpNode = null_frag> { + let isCommutable = 1 in + def r : FMA3<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set RC:$dst, + (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>; let mayLoad = 1 in - def m : FMA3<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, RC:$src2, x86memop:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; -} // neverHasSideEffects = 1 + def m : FMA3<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, RC:$src2, x86memop:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set RC:$dst, + (OpVT (OpNode RC:$src2, RC:$src1, + (mem_frag addr:$src3))))]>; } multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop, - ComplexPattern mem_cpat, Intrinsic IntId, - RegisterClass RC, SDNode OpNode, ValueType OpVT> { + ComplexPattern mem_cpat, Intrinsic IntId, + RegisterClass RC> { + let isCommutable = 1 in def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1, + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1, VR128:$src3))]>; def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, memop:$src3), - !strconcat(OpcodeStr, + !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1, mem_cpat:$src3))]>; - def r : FMA3<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, RC:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set RC:$dst, - (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>; - let mayLoad = 1 in - def m : FMA3<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, RC:$src2, memop:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; } } // Constraints = "$src1 = $dst" multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, - string OpStr, Intrinsic IntF32, Intrinsic IntF64, - SDNode OpNode> { - defm SSr132 : fma3s_rm<opc132, !strconcat(OpStr, "132ss"), f32mem, FR32>; - defm SSr231 : fma3s_rm<opc231, !strconcat(OpStr, "231ss"), f32mem, FR32>; - defm SDr132 : fma3s_rm<opc132, !strconcat(OpStr, "132sd"), f64mem, FR64>, - VEX_W; - defm SDr231 : fma3s_rm<opc231, !strconcat(OpStr, "231sd"), f64mem, FR64>, - VEX_W; - defm SSr213 : fma3s_rm_int <opc213, !strconcat(OpStr, "213ss"), ssmem, - sse_load_f32, IntF32, FR32, OpNode, f32>; - defm SDr213 : fma3s_rm_int <opc213, !strconcat(OpStr, "213sd"), sdmem, - sse_load_f64, IntF64, FR64, OpNode, f64>, VEX_W; + string OpStr, string PackTy, Intrinsic Int, + SDNode OpNode, RegisterClass RC, ValueType OpVT, + X86MemOperand x86memop, Operand memop, PatFrag mem_frag, + ComplexPattern mem_cpat> { +let neverHasSideEffects = 1 in { + defm r132 : fma3s_rm<opc132, !strconcat(OpStr, !strconcat("132", PackTy)), + x86memop, RC, OpVT, mem_frag>; + defm r231 : fma3s_rm<opc231, !strconcat(OpStr, !strconcat("231", PackTy)), + x86memop, RC, OpVT, mem_frag>; +} + +defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)), + x86memop, RC, OpVT, mem_frag, OpNode>, + fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)), + memop, mem_cpat, Int, RC>; +} + +multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231, + string OpStr, Intrinsic IntF32, Intrinsic IntF64, + SDNode OpNode> { + defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", IntF32, OpNode, + FR32, f32, f32mem, ssmem, loadf32, sse_load_f32>; + defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", IntF64, OpNode, + FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W; } -defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, - int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG; -defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss, - int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG; +defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, + int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG; +defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss, + int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG; -defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss, - int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG; -defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, - int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG; +defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss, + int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG; +defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, + int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG; //===----------------------------------------------------------------------===// @@ -260,73 +197,102 @@ defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, //===----------------------------------------------------------------------===// -multiclass fma4s<bits<8> opc, string OpcodeStr, Operand memop, - ComplexPattern mem_cpat, Intrinsic Int> { - def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), +multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop, ValueType OpVT, SDNode OpNode, + PatFrag mem_frag> { + let isCommutable = 1 in + def rr : FMA4<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, - (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4; - def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, memop:$src3), + [(set RC:$dst, + (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, MemOp4; + def rm : FMA4<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, RC:$src2, x86memop:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, - (Int VR128:$src1, VR128:$src2, mem_cpat:$src3))]>, VEX_W, MemOp4; - def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, memop:$src2, VR128:$src3), + [(set RC:$dst, (OpNode RC:$src1, RC:$src2, + (mem_frag addr:$src3)))]>, VEX_W, MemOp4; + def mr : FMA4<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, - (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>; + [(set RC:$dst, + (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>; // For disassembler let isCodeGenOnly = 1 in - def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), + def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>; } -multiclass fma4p<bits<8> opc, string OpcodeStr, - Intrinsic Int128, Intrinsic Int256, +multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop, + ComplexPattern mem_cpat, Intrinsic Int> { + let isCommutable = 1 in + def rr_Int : FMA4<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set VR128:$dst, + (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4; + def rm_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, memop:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, + mem_cpat:$src3))]>, VEX_W, MemOp4; + def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, memop:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set VR128:$dst, + (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>; +} + +multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT128, ValueType OpVT256, PatFrag ld_frag128, PatFrag ld_frag256> { + let isCommutable = 1 in def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (Int128 VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4; + (OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, + VEX_W, MemOp4; def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, f128mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2, + [(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2, (ld_frag128 addr:$src3)))]>, VEX_W, MemOp4; def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (Int128 VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>; + (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>; + let isCommutable = 1 in def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR256:$dst, - (Int256 VR256:$src1, VR256:$src2, VR256:$src3))]>, VEX_W, MemOp4; + (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>, + VEX_W, MemOp4, VEX_L; def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, f256mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2, - (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4; + [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2, + (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4, VEX_L; def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR256:$dst, - (Int256 VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>; + [(set VR256:$dst, (OpNode VR256:$src1, + (ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L; // For disassembler let isCodeGenOnly = 1 in { def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst), @@ -336,51 +302,65 @@ let isCodeGenOnly = 1 in { def rrY_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3), !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>; + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, + VEX_L; } // isCodeGenOnly = 1 } let Predicates = [HasFMA4] in { -defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", ssmem, sse_load_f32, - int_x86_fma_vfmadd_ss>; -defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfmadd_sd>; -defm VFMADDPS4 : fma4p<0x68, "vfmaddps", int_x86_fma_vfmadd_ps, - int_x86_fma_vfmadd_ps_256, memopv4f32, memopv8f32>; -defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", int_x86_fma_vfmadd_pd, - int_x86_fma_vfmadd_pd_256, memopv2f64, memopv4f64>; -defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", ssmem, sse_load_f32, - int_x86_fma_vfmsub_ss>; -defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfmsub_sd>; -defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", int_x86_fma_vfmsub_ps, - int_x86_fma_vfmsub_ps_256, memopv4f32, memopv8f32>; -defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", int_x86_fma_vfmsub_pd, - int_x86_fma_vfmsub_pd_256, memopv2f64, memopv4f64>; -defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", ssmem, sse_load_f32, - int_x86_fma_vfnmadd_ss>; -defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfnmadd_sd>; -defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", int_x86_fma_vfnmadd_ps, - int_x86_fma_vfnmadd_ps_256, memopv4f32, memopv8f32>; -defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", int_x86_fma_vfnmadd_pd, - int_x86_fma_vfnmadd_pd_256, memopv2f64, memopv4f64>; -defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", ssmem, sse_load_f32, - int_x86_fma_vfnmsub_ss>; -defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfnmsub_sd>; -defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", int_x86_fma_vfnmsub_ps, - int_x86_fma_vfnmsub_ps_256, memopv4f32, memopv8f32>; -defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", int_x86_fma_vfnmsub_pd, - int_x86_fma_vfnmsub_pd_256, memopv2f64, memopv4f64>; -defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", int_x86_fma_vfmaddsub_ps, - int_x86_fma_vfmaddsub_ps_256, memopv4f32, memopv8f32>; -defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", int_x86_fma_vfmaddsub_pd, - int_x86_fma_vfmaddsub_pd_256, memopv2f64, memopv4f64>; -defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", int_x86_fma_vfmsubadd_ps, - int_x86_fma_vfmsubadd_ps_256, memopv4f32, memopv8f32>; -defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", int_x86_fma_vfmsubadd_pd, - int_x86_fma_vfmsubadd_pd_256, memopv2f64, memopv4f64>; +defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, + fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32, + int_x86_fma_vfmadd_ss>; +defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, + fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64, + int_x86_fma_vfmadd_sd>; +defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, + fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32, + int_x86_fma_vfmsub_ss>; +defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, + fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64, + int_x86_fma_vfmsub_sd>; +defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, + X86Fnmadd, loadf32>, + fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32, + int_x86_fma_vfnmadd_ss>; +defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, + X86Fnmadd, loadf64>, + fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64, + int_x86_fma_vfnmadd_sd>; +defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, + X86Fnmsub, loadf32>, + fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32, + int_x86_fma_vfnmsub_ss>; +defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, + X86Fnmsub, loadf64>, + fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64, + int_x86_fma_vfnmsub_sd>; + +defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64, + memopv2f64, memopv4f64>; +defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64, + memopv2f64, memopv4f64>; +defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64, + memopv2f64, memopv4f64>; +defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64, + memopv2f64, memopv4f64>; +defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64, + memopv2f64, memopv4f64>; +defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64, + memopv2f64, memopv4f64>; } // HasFMA4 diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td index 81b4f81..268e9fc 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFormats.td +++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td @@ -44,14 +44,15 @@ def RawFrmImm16 : Format<44>; def MRM_D0 : Format<45>; def MRM_D1 : Format<46>; def MRM_D4 : Format<47>; -def MRM_D8 : Format<48>; -def MRM_D9 : Format<49>; -def MRM_DA : Format<50>; -def MRM_DB : Format<51>; -def MRM_DC : Format<52>; -def MRM_DD : Format<53>; -def MRM_DE : Format<54>; -def MRM_DF : Format<55>; +def MRM_D5 : Format<48>; +def MRM_D8 : Format<49>; +def MRM_D9 : Format<50>; +def MRM_DA : Format<51>; +def MRM_DB : Format<52>; +def MRM_DC : Format<53>; +def MRM_DD : Format<54>; +def MRM_DE : Format<55>; +def MRM_DF : Format<56>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our @@ -287,12 +288,14 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } +def __xs : XS; + // SI - SSE 1 & 2 scalar instructions class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); + !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -303,7 +306,7 @@ class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); + !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -314,18 +317,25 @@ class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin, Domain d> : I<o, F, outs, ins, asm, pattern, itin, d> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); + !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); } +// MMXPI - SSE 1 & 2 packed instructions with MMX operands +class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, + InstrItinClass itin, Domain d> + : I<o, F, outs, ins, asm, pattern, itin, d> { + let Predicates = !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]); +} + // PIi8 - SSE 1 & 2 packed instructions with immediate class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin, Domain d> : Ii8<o, F, outs, ins, asm, pattern, itin, d> { let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX], - !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); + !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm); @@ -341,18 +351,18 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>; + : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>; class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>; + : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>; class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS, @@ -372,27 +382,31 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. // VSDI - SSE2 instructions with XD prefix in AVX form. // VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form. +// MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix as well as +// MMX operands. +// MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as +// MMX operands. class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; + : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>; class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; + : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>; class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE2]>; + : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE2]>; class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; + : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>; class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD, @@ -405,6 +419,12 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[HasAVX]>; +class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; +class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; // SSE3 Instruction Templates: // @@ -415,21 +435,23 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS, - Requires<[HasSSE3]>; + Requires<[UseSSE3]>; class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD, - Requires<[HasSSE3]>; + Requires<[UseSSE3]>; class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, - Requires<[HasSSE3]>; + Requires<[UseSSE3]>; // SSSE3 Instruction Templates: // // SS38I - SSSE3 instructions with T8 prefix. // SS3AI - SSSE3 instructions with TA prefix. +// MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands. +// MMXSS3AI - SSSE3 instructions with TA prefix and MMX operands. // // Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version // uses the MMX registers. The 64-bit versions are grouped with the MMX @@ -438,10 +460,18 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, - Requires<[HasSSSE3]>; + Requires<[UseSSSE3]>; class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, + Requires<[UseSSSE3]>; +class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, + Requires<[HasSSSE3]>; +class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[HasSSSE3]>; // SSE4.1 Instruction Templates: @@ -452,11 +482,11 @@ class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, - Requires<[HasSSE41]>; + Requires<[UseSSE41]>; class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, - Requires<[HasSSE41]>; + Requires<[UseSSE41]>; // SSE4.2 Instruction Templates: // @@ -464,9 +494,10 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, - Requires<[HasSSE42]>; + Requires<[UseSSE42]>; // SS42FI - SSE 4.2 instructions with T8XD prefix. +// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns. class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>; @@ -475,7 +506,7 @@ class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, - Requires<[HasSSE42]>; + Requires<[UseSSE42]>; // AVX Instruction Templates: // Instructions introduced in AVX (no SSE equivalent forms) diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 1db68c8..73ba001 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -29,6 +29,13 @@ def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; + +// Commutative and Associative FMIN and FMAX. +def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; + def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp, @@ -73,18 +80,30 @@ def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; def X86vzmovly : SDNode<"X86ISD::VZEXT_MOVL", - SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisOpSmallerThanOp<1, 0> ]>>; def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL", - SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>; + SDTypeProfile<1, 1, + [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>; def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def X86vzext : SDNode<"X86ISD::VZEXT", + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCisInt<1>]>>; + +def X86vsext : SDNode<"X86ISD::VSEXT", + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCisInt<1>]>>; + def X86vfpext : SDNode<"X86ISD::VFPEXT", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisFP<0>, SDTCisFP<1>]>>; +def X86vfpround: SDNode<"X86ISD::VFPROUND", + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisFP<0>, SDTCisFP<1>]>>; def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>; def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>; @@ -175,8 +194,8 @@ def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>; def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>; -def X86Fmaddsub : SDNode<"X86ISD::FMSUBADD", SDTFma>; -def X86Fmsubadd : SDNode<"X86ISD::FMADDSUB", SDTFma>; +def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>; +def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>; def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>, @@ -232,6 +251,10 @@ def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; +// 128-/256-bit extload pattern fragments +def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>; +def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>; + // Like 'store', but always requires 128-bit vector alignment. def alignedstore : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp index cca04e5..5a99ff0 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -561,6 +561,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 }, { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, + + // BMI/BMI2 foldable instructions + { X86::RORX32ri, X86::RORX32mi, 0 }, + { X86::RORX64ri, X86::RORX64mi, 0 }, + { X86::SARX32rr, X86::SARX32rm, 0 }, + { X86::SARX64rr, X86::SARX64rm, 0 }, + { X86::SHRX32rr, X86::SHRX32rm, 0 }, + { X86::SHRX64rr, X86::SHRX64rm, 0 }, + { X86::SHLX32rr, X86::SHLX32rm, 0 }, + { X86::SHLX64rr, X86::SHLX64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { @@ -1110,6 +1120,44 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 }, { X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 }, // FIXME: add AVX 256-bit foldable instructions + + // FMA4 foldable patterns + { X86::VFMADDSS4rr, X86::VFMADDSS4mr, 0 }, + { X86::VFMADDSD4rr, X86::VFMADDSD4mr, 0 }, + { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_16 }, + { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_16 }, + { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_32 }, + { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_32 }, + { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, 0 }, + { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, 0 }, + { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_16 }, + { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_16 }, + { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_32 }, + { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_32 }, + { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, 0 }, + { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, 0 }, + { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_16 }, + { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_16 }, + { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_32 }, + { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_32 }, + { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, 0 }, + { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, 0 }, + { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_16 }, + { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_16 }, + { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_32 }, + { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, TB_ALIGN_32 }, + { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_16 }, + { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_16 }, + { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, TB_ALIGN_32 }, + { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, TB_ALIGN_32 }, + { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_16 }, + { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_16 }, + { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_32 }, + { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 }, + + // BMI/BMI2 foldable instructions + { X86::MULX32rr, X86::MULX32rm, 0 }, + { X86::MULX64rr, X86::MULX64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { @@ -1145,10 +1193,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 }, { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 }, { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 }, - { X86::VFMADDPSr213r_Int, X86::VFMADDPSr213m_Int, TB_ALIGN_16 }, - { X86::VFMADDPDr213r_Int, X86::VFMADDPDr213m_Int, TB_ALIGN_16 }, - { X86::VFMADDPSr213rY_Int, X86::VFMADDPSr213mY_Int, TB_ALIGN_32 }, - { X86::VFMADDPDr213rY_Int, X86::VFMADDPDr213mY_Int, TB_ALIGN_32 }, { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 }, { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 }, @@ -1171,10 +1215,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 }, { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 }, { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 }, - { X86::VFNMADDPSr213r_Int, X86::VFNMADDPSr213m_Int, TB_ALIGN_16 }, - { X86::VFNMADDPDr213r_Int, X86::VFNMADDPDr213m_Int, TB_ALIGN_16 }, - { X86::VFNMADDPSr213rY_Int, X86::VFNMADDPSr213mY_Int, TB_ALIGN_32 }, - { X86::VFNMADDPDr213rY_Int, X86::VFNMADDPDr213mY_Int, TB_ALIGN_32 }, { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 }, { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 }, @@ -1197,10 +1237,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 }, { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 }, { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 }, - { X86::VFMSUBPSr213r_Int, X86::VFMSUBPSr213m_Int, TB_ALIGN_16 }, - { X86::VFMSUBPDr213r_Int, X86::VFMSUBPDr213m_Int, TB_ALIGN_16 }, - { X86::VFMSUBPSr213rY_Int, X86::VFMSUBPSr213mY_Int, TB_ALIGN_32 }, - { X86::VFMSUBPDr213rY_Int, X86::VFMSUBPDr213mY_Int, TB_ALIGN_32 }, { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 }, { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 }, @@ -1223,10 +1259,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 }, { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 }, { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 }, - { X86::VFNMSUBPSr213r_Int, X86::VFNMSUBPSr213m_Int, TB_ALIGN_16 }, - { X86::VFNMSUBPDr213r_Int, X86::VFNMSUBPDr213m_Int, TB_ALIGN_16 }, - { X86::VFNMSUBPSr213rY_Int, X86::VFNMSUBPSr213mY_Int, TB_ALIGN_32 }, - { X86::VFNMSUBPDr213rY_Int, X86::VFNMSUBPDr213mY_Int, TB_ALIGN_32 }, { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_16 }, { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_16 }, @@ -1240,10 +1272,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_32 }, { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_32 }, { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_32 }, - { X86::VFMADDSUBPSr213r_Int, X86::VFMADDSUBPSr213m_Int, TB_ALIGN_16 }, - { X86::VFMADDSUBPDr213r_Int, X86::VFMADDSUBPDr213m_Int, TB_ALIGN_16 }, - { X86::VFMADDSUBPSr213rY_Int, X86::VFMADDSUBPSr213mY_Int, TB_ALIGN_32 }, - { X86::VFMADDSUBPDr213rY_Int, X86::VFMADDSUBPDr213mY_Int, TB_ALIGN_32 }, { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_16 }, { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_16 }, @@ -1257,10 +1285,40 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 }, { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 }, { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 }, - { X86::VFMSUBADDPSr213r_Int, X86::VFMSUBADDPSr213m_Int, TB_ALIGN_16 }, - { X86::VFMSUBADDPDr213r_Int, X86::VFMSUBADDPDr213m_Int, TB_ALIGN_16 }, - { X86::VFMSUBADDPSr213rY_Int, X86::VFMSUBADDPSr213mY_Int, TB_ALIGN_32 }, - { X86::VFMSUBADDPDr213rY_Int, X86::VFMSUBADDPDr213mY_Int, TB_ALIGN_32 }, + + // FMA4 foldable patterns + { X86::VFMADDSS4rr, X86::VFMADDSS4rm, 0 }, + { X86::VFMADDSD4rr, X86::VFMADDSD4rm, 0 }, + { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_16 }, + { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_16 }, + { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_32 }, + { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_32 }, + { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, 0 }, + { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, 0 }, + { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_16 }, + { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_16 }, + { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_32 }, + { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_32 }, + { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, 0 }, + { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, 0 }, + { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_16 }, + { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_16 }, + { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_32 }, + { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_32 }, + { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, 0 }, + { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, 0 }, + { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_16 }, + { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_16 }, + { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_32 }, + { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_32 }, + { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_16 }, + { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_16 }, + { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_32 }, + { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_32 }, + { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_16 }, + { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 }, + { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 }, + { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) { @@ -1318,8 +1376,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); switch (MI.getOpcode()) { - default: - llvm_unreachable(0); + default: llvm_unreachable("Unreachable!"); case X86::MOVSX16rr8: case X86::MOVZX16rr8: case X86::MOVSX32rr8: @@ -1483,69 +1540,69 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, AliasAnalysis *AA) const { switch (MI->getOpcode()) { default: break; - case X86::MOV8rm: - case X86::MOV16rm: - case X86::MOV32rm: - case X86::MOV64rm: - case X86::LD_Fp64m: - case X86::MOVSSrm: - case X86::MOVSDrm: - case X86::MOVAPSrm: - case X86::MOVUPSrm: - case X86::MOVAPDrm: - case X86::MOVDQArm: - case X86::VMOVSSrm: - case X86::VMOVSDrm: - case X86::VMOVAPSrm: - case X86::VMOVUPSrm: - case X86::VMOVAPDrm: - case X86::VMOVDQArm: - case X86::VMOVAPSYrm: - case X86::VMOVUPSYrm: - case X86::VMOVAPDYrm: - case X86::VMOVDQAYrm: - case X86::MMX_MOVD64rm: - case X86::MMX_MOVQ64rm: - case X86::FsVMOVAPSrm: - case X86::FsVMOVAPDrm: - case X86::FsMOVAPSrm: - case X86::FsMOVAPDrm: { - // Loads from constant pools are trivially rematerializable. - if (MI->getOperand(1).isReg() && - MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - MI->isInvariantLoad(AA)) { - unsigned BaseReg = MI->getOperand(1).getReg(); - if (BaseReg == 0 || BaseReg == X86::RIP) - return true; - // Allow re-materialization of PIC load. - if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) - return false; - const MachineFunction &MF = *MI->getParent()->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - return regIsPICBase(BaseReg, MRI); - } - return false; + case X86::MOV8rm: + case X86::MOV16rm: + case X86::MOV32rm: + case X86::MOV64rm: + case X86::LD_Fp64m: + case X86::MOVSSrm: + case X86::MOVSDrm: + case X86::MOVAPSrm: + case X86::MOVUPSrm: + case X86::MOVAPDrm: + case X86::MOVDQArm: + case X86::VMOVSSrm: + case X86::VMOVSDrm: + case X86::VMOVAPSrm: + case X86::VMOVUPSrm: + case X86::VMOVAPDrm: + case X86::VMOVDQArm: + case X86::VMOVAPSYrm: + case X86::VMOVUPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: + case X86::MMX_MOVD64rm: + case X86::MMX_MOVQ64rm: + case X86::FsVMOVAPSrm: + case X86::FsVMOVAPDrm: + case X86::FsMOVAPSrm: + case X86::FsMOVAPDrm: { + // Loads from constant pools are trivially rematerializable. + if (MI->getOperand(1).isReg() && + MI->getOperand(2).isImm() && + MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && + MI->isInvariantLoad(AA)) { + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == 0 || BaseReg == X86::RIP) + return true; + // Allow re-materialization of PIC load. + if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) + return false; + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + return regIsPICBase(BaseReg, MRI); } + return false; + } - case X86::LEA32r: - case X86::LEA64r: { - if (MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - !MI->getOperand(4).isReg()) { - // lea fi#, lea GV, etc. are all rematerializable. - if (!MI->getOperand(1).isReg()) - return true; - unsigned BaseReg = MI->getOperand(1).getReg(); - if (BaseReg == 0) - return true; - // Allow re-materialization of lea PICBase + x. - const MachineFunction &MF = *MI->getParent()->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - return regIsPICBase(BaseReg, MRI); - } - return false; - } + case X86::LEA32r: + case X86::LEA64r: { + if (MI->getOperand(2).isImm() && + MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && + !MI->getOperand(4).isReg()) { + // lea fi#, lea GV, etc. are all rematerializable. + if (!MI->getOperand(1).isReg()) + return true; + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == 0) + return true; + // Allow re-materialization of lea PICBase + x. + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + return regIsPICBase(BaseReg, MRI); + } + return false; + } } // All other instructions marked M_REMATERIALIZABLE are always trivially @@ -1654,7 +1711,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, case X86::MOV64r0: { if (!isSafeToClobberEFLAGS(MBB, I)) { switch (Opc) { - default: break; + default: llvm_unreachable("Unreachable!"); case X86::MOV8r0: Opc = X86::MOV8ri; break; case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; @@ -1727,8 +1784,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg); switch (MIOpc) { - default: - llvm_unreachable(0); + default: llvm_unreachable("Unreachable!"); case X86::SHL16ri: { unsigned ShAmt = MI->getOperand(2).getImm(); MIB.addReg(0).addImm(1 << ShAmt) @@ -1812,10 +1868,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr *MI = MBBI; MachineFunction &MF = *MI->getParent()->getParent(); // All instructions input are two-addr instructions. Get the known operands. - unsigned Dest = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isKill = MI->getOperand(1).isKill(); + const MachineOperand &Dest = MI->getOperand(0); + const MachineOperand &Src = MI->getOperand(1); MachineInstr *NewMI = NULL; // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When @@ -1833,11 +1887,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned B = MI->getOperand(1).getReg(); unsigned C = MI->getOperand(2).getReg(); if (B != C) return 0; - unsigned A = MI->getOperand(0).getReg(); unsigned M = MI->getOperand(3).getImm(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) - .addReg(A, RegState::Define | getDeadRegState(isDead)) - .addReg(B, getKillRegState(isKill)).addImm(M); + .addOperand(Dest).addOperand(Src).addImm(M); break; } case X86::SHUFPDrri: { @@ -1847,15 +1899,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned B = MI->getOperand(1).getReg(); unsigned C = MI->getOperand(2).getReg(); if (B != C) return 0; - unsigned A = MI->getOperand(0).getReg(); unsigned M = MI->getOperand(3).getImm(); // Convert to PSHUFD mask. M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44; NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) - .addReg(A, RegState::Define | getDeadRegState(isDead)) - .addReg(B, getKillRegState(isKill)).addImm(M); + .addOperand(Dest).addOperand(Src).addImm(M); break; } case X86::SHL64ri: { @@ -1866,15 +1916,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (ShAmt == 0 || ShAmt >= 4) return 0; // LEA can't handle RSP. - if (TargetRegisterInfo::isVirtualRegister(Src) && - !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass)) + if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && + !MF.getRegInfo().constrainRegClass(Src.getReg(), + &X86::GR64_NOSPRegClass)) return 0; NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)) - .addImm(0).addReg(0); + .addOperand(Dest) + .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0); break; } case X86::SHL32ri: { @@ -1885,15 +1934,15 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (ShAmt == 0 || ShAmt >= 4) return 0; // LEA can't handle ESP. - if (TargetRegisterInfo::isVirtualRegister(Src) && - !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass)) + if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && + !MF.getRegInfo().constrainRegClass(Src.getReg(), + &X86::GR32_NOSPRegClass)) return 0; unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0); + .addOperand(Dest) + .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0); break; } case X86::SHL16ri: { @@ -1906,10 +1955,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)) - .addImm(0).addReg(0); + .addOperand(Dest) + .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0); break; } default: { @@ -1932,14 +1979,12 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, (const TargetRegisterClass*)&X86::GR32_NOSPRegClass; // LEA can't handle RSP. - if (TargetRegisterInfo::isVirtualRegister(Src) && - !MF.getRegInfo().constrainRegClass(Src, RC)) + if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && + !MF.getRegInfo().constrainRegClass(Src.getReg(), RC)) return 0; - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, 1); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addOperand(Dest).addOperand(Src), 1); break; } case X86::INC16r: @@ -1947,10 +1992,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, 1); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addOperand(Dest).addOperand(Src), 1); break; case X86::DEC64r: case X86::DEC32r: @@ -1962,14 +2005,12 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, (const TargetRegisterClass*)&X86::GR64_NOSPRegClass : (const TargetRegisterClass*)&X86::GR32_NOSPRegClass; // LEA can't handle RSP. - if (TargetRegisterInfo::isVirtualRegister(Src) && - !MF.getRegInfo().constrainRegClass(Src, RC)) + if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && + !MF.getRegInfo().constrainRegClass(Src.getReg(), RC)) return 0; - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, -1); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addOperand(Dest).addOperand(Src), -1); break; } case X86::DEC16r: @@ -1977,10 +2018,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, -1); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addOperand(Dest).addOperand(Src), -1); break; case X86::ADD64rr: case X86::ADD64rr_DB: @@ -2007,9 +2046,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return 0; NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, Src2, isKill2); + .addOperand(Dest), + Src.getReg(), Src.isKill(), Src2, isKill2); // Preserve undefness of the operands. bool isUndef = MI->getOperand(1).isUndef(); @@ -2029,9 +2067,15 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, Src2, isKill2); + .addOperand(Dest), + Src.getReg(), Src.isKill(), Src2, isKill2); + + // Preserve undefness of the operands. + bool isUndef = MI->getOperand(1).isUndef(); + bool isUndef2 = MI->getOperand(2).isUndef(); + NewMI->getOperand(1).setIsUndef(isUndef); + NewMI->getOperand(3).setIsUndef(isUndef2); + if (LV && isKill2) LV->replaceKillInstruction(Src2, MI, NewMI); break; @@ -2041,10 +2085,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD64ri32_DB: case X86::ADD64ri8_DB: assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, MI->getOperand(2).getImm()); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) + .addOperand(Dest).addOperand(Src), + MI->getOperand(2).getImm()); break; case X86::ADD32ri: case X86::ADD32ri8: @@ -2052,10 +2095,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD32ri8_DB: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, MI->getOperand(2).getImm()); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addOperand(Dest).addOperand(Src), + MI->getOperand(2).getImm()); break; } case X86::ADD16ri: @@ -2065,10 +2107,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, MI->getOperand(2).getImm()); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addOperand(Dest).addOperand(Src), + MI->getOperand(2).getImm()); break; } } @@ -2077,10 +2118,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (!NewMI) return 0; if (LV) { // Update live variables - if (isKill) - LV->replaceKillInstruction(Src, MI, NewMI); - if (isDead) - LV->replaceKillInstruction(Dest, MI, NewMI); + if (Src.isKill()) + LV->replaceKillInstruction(Src.getReg(), MI, NewMI); + if (Dest.isDead()) + LV->replaceKillInstruction(Dest.getReg(), MI, NewMI); } MFI->insert(MBBI, NewMI); // Insert the new inst @@ -2120,57 +2161,25 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { MI->getOperand(3).setImm(Size-Amt); return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); } - case X86::CMOVB16rr: - case X86::CMOVB32rr: - case X86::CMOVB64rr: - case X86::CMOVAE16rr: - case X86::CMOVAE32rr: - case X86::CMOVAE64rr: - case X86::CMOVE16rr: - case X86::CMOVE32rr: - case X86::CMOVE64rr: - case X86::CMOVNE16rr: - case X86::CMOVNE32rr: - case X86::CMOVNE64rr: - case X86::CMOVBE16rr: - case X86::CMOVBE32rr: - case X86::CMOVBE64rr: - case X86::CMOVA16rr: - case X86::CMOVA32rr: - case X86::CMOVA64rr: - case X86::CMOVL16rr: - case X86::CMOVL32rr: - case X86::CMOVL64rr: - case X86::CMOVGE16rr: - case X86::CMOVGE32rr: - case X86::CMOVGE64rr: - case X86::CMOVLE16rr: - case X86::CMOVLE32rr: - case X86::CMOVLE64rr: - case X86::CMOVG16rr: - case X86::CMOVG32rr: - case X86::CMOVG64rr: - case X86::CMOVS16rr: - case X86::CMOVS32rr: - case X86::CMOVS64rr: - case X86::CMOVNS16rr: - case X86::CMOVNS32rr: - case X86::CMOVNS64rr: - case X86::CMOVP16rr: - case X86::CMOVP32rr: - case X86::CMOVP64rr: - case X86::CMOVNP16rr: - case X86::CMOVNP32rr: - case X86::CMOVNP64rr: - case X86::CMOVO16rr: - case X86::CMOVO32rr: - case X86::CMOVO64rr: - case X86::CMOVNO16rr: - case X86::CMOVNO32rr: - case X86::CMOVNO64rr: { - unsigned Opc = 0; + case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr: + case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr: + case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr: + case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr: + case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr: + case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr: + case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr: + case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr: + case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr: + case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr: + case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr: + case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr: + case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr: + case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr: + case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr: + case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: { + unsigned Opc; switch (MI->getOpcode()) { - default: break; + default: llvm_unreachable("Unreachable!"); case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; @@ -2279,7 +2288,7 @@ static X86::CondCode getCondFromSETOpc(unsigned Opc) { } /// getCondFromCmovOpc - return condition code of a CMov opcode. -static X86::CondCode getCondFromCMovOpc(unsigned Opc) { +X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) { switch (Opc) { default: return X86::COND_INVALID; case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm: @@ -2402,7 +2411,7 @@ static X86::CondCode getSwappedCondition(X86::CondCode CC) { /// whether it has memory operand. static unsigned getSETFromCond(X86::CondCode CC, bool HasMemoryOperand) { - static const unsigned Opc[16][2] = { + static const uint16_t Opc[16][2] = { { X86::SETAr, X86::SETAm }, { X86::SETAEr, X86::SETAEm }, { X86::SETBr, X86::SETBm }, @@ -2429,7 +2438,7 @@ static unsigned getSETFromCond(X86::CondCode CC, /// register size in bytes, and operand type. static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes, bool HasMemoryOperand) { - static const unsigned Opc[32][3] = { + static const uint16_t Opc[32][3] = { { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr }, { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr }, { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr }, @@ -2762,19 +2771,18 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, // SrcReg(GR64) -> DestReg(VR64) if (X86::GR64RegClass.contains(DestReg)) { - if (X86::VR128RegClass.contains(SrcReg)) { + if (X86::VR128RegClass.contains(SrcReg)) // Copy from a VR128 register to a GR64 register. return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr; - } else if (X86::VR64RegClass.contains(SrcReg)) { + if (X86::VR64RegClass.contains(SrcReg)) // Copy from a VR64 register to a GR64 register. return X86::MOVSDto64rr; - } } else if (X86::GR64RegClass.contains(SrcReg)) { // Copy from a GR64 register to a VR128 register. if (X86::VR128RegClass.contains(DestReg)) return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr; // Copy from a GR64 register to a VR64 register. - else if (X86::VR64RegClass.contains(DestReg)) + if (X86::VR64RegClass.contains(DestReg)) return X86::MOV64toSDrr; } @@ -2782,12 +2790,12 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, // SrcReg(GR32) -> DestReg(FR32) if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg)) - // Copy from a FR32 register to a GR32 register. - return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr; + // Copy from a FR32 register to a GR32 register. + return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr; if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg)) - // Copy from a GR32 register to a FR32 register. - return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr; + // Copy from a GR32 register to a FR32 register. + return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr; return 0; } @@ -2798,7 +2806,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool KillSrc) const { // First deal with the normal symmetric copies. bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); - unsigned Opc = 0; + unsigned Opc; if (X86::GR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MOV64rr; else if (X86::GR32RegClass.contains(DestReg, SrcReg)) @@ -2837,7 +2845,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(X86::PUSHF64)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); return; - } else if (X86::GR32RegClass.contains(DestReg)) { + } + if (X86::GR32RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF32)); BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); return; @@ -2849,7 +2858,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF64)); return; - } else if (X86::GR32RegClass.contains(SrcReg)) { + } + if (X86::GR32RegClass.contains(SrcReg)) { BuildMI(MBB, MI, DL, get(X86::PUSH32r)) .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF32)); @@ -3139,11 +3149,19 @@ inline static bool isDefConvertible(MachineInstr *MI) { case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr: case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm: case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm: + case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r: + case X86::DEC64m: case X86::DEC32m: case X86::DEC16m: case X86::DEC8m: + case X86::DEC64_32r: case X86::DEC64_16r: + case X86::DEC64_32m: case X86::DEC64_16m: case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri: case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8: case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr: case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm: case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm: + case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r: + case X86::INC64m: case X86::INC32m: case X86::INC16m: case X86::INC8m: + case X86::INC64_32r: case X86::INC64_16r: + case X86::INC64_32m: case X86::INC64_16m: case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri: case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8: case X86::AND8ri: case X86::AND64rr: case X86::AND32rr: @@ -3193,7 +3211,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, return false; // There is no use of the destination register, we can replace SUB with CMP. switch (CmpInstr->getOpcode()) { - default: llvm_unreachable(0); + default: llvm_unreachable("Unreachable!"); case X86::SUB64rm: NewOpcode = X86::CMP64rm; break; case X86::SUB32rm: NewOpcode = X86::CMP32rm; break; case X86::SUB16rm: NewOpcode = X86::CMP16rm; break; @@ -3318,7 +3336,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, if (OldCC != X86::COND_INVALID) OpcIsSET = true; else - OldCC = getCondFromCMovOpc(Instr.getOpcode()); + OldCC = X86::getCondFromCMovOpc(Instr.getOpcode()); } if (OldCC == X86::COND_INVALID) return false; } @@ -3383,12 +3401,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, Sub->getParent()->insert(MachineBasicBlock::iterator(Sub), Movr0Inst); } - // Make sure Sub instruction defines EFLAGS. + // Make sure Sub instruction defines EFLAGS and mark the def live. + unsigned LastOperand = Sub->getNumOperands() - 1; assert(Sub->getNumOperands() >= 2 && - Sub->getOperand(Sub->getNumOperands()-1).isReg() && - Sub->getOperand(Sub->getNumOperands()-1).getReg() == X86::EFLAGS && + Sub->getOperand(LastOperand).isReg() && + Sub->getOperand(LastOperand).getReg() == X86::EFLAGS && "EFLAGS should be the last operand of SUB, ADD, OR, XOR, AND"); - Sub->getOperand(Sub->getNumOperands()-1).setIsDef(true); + Sub->getOperand(LastOperand).setIsDef(true); + Sub->getOperand(LastOperand).setIsDead(false); CmpInstr->eraseFromParent(); // Modify the condition code of instructions in OpsToUpdate. @@ -3497,10 +3517,25 @@ static bool Expand2AddrUndef(MachineInstr *MI, const MCInstrDesc &Desc) { bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); switch (MI->getOpcode()) { + case X86::SETB_C8r: + return Expand2AddrUndef(MI, get(X86::SBB8rr)); + case X86::SETB_C16r: + return Expand2AddrUndef(MI, get(X86::SBB16rr)); + case X86::SETB_C32r: + return Expand2AddrUndef(MI, get(X86::SBB32rr)); + case X86::SETB_C64r: + return Expand2AddrUndef(MI, get(X86::SBB64rr)); case X86::V_SET0: case X86::FsFLD0SS: case X86::FsFLD0SD: return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); + case X86::AVX_SET0: + assert(HasAVX && "AVX not supported"); + return Expand2AddrUndef(MI, get(X86::VXORPSYrr)); + case X86::V_SETALLONES: + return Expand2AddrUndef(MI, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr)); + case X86::AVX2_SETALLONES: + return Expand2AddrUndef(MI, get(X86::VPCMPEQDYrr)); case X86::TEST8ri_NOREX: MI->setDesc(get(X86::TEST8ri)); return true; @@ -3614,14 +3649,16 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, OpcodeTablePtr = &RegOp2MemOpTable2Addr; isTwoAddrFold = true; } else if (i == 0) { // If operand 0 - if (MI->getOpcode() == X86::MOV64r0) - NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); - else if (MI->getOpcode() == X86::MOV32r0) - NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); - else if (MI->getOpcode() == X86::MOV16r0) - NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); - else if (MI->getOpcode() == X86::MOV8r0) - NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); + unsigned Opc = 0; + switch (MI->getOpcode()) { + default: break; + case X86::MOV64r0: Opc = X86::MOV64mi32; break; + case X86::MOV32r0: Opc = X86::MOV32mi; break; + case X86::MOV16r0: Opc = X86::MOV16mi; break; + case X86::MOV8r0: Opc = X86::MOV8mi; break; + } + if (Opc) + NewMI = MakeM0Inst(*this, Opc, MOs, MI); if (NewMI) return NewMI; @@ -3799,7 +3836,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Unless optimizing for size, don't fold to avoid partial // register update stalls - if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + if (!MF.getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize) && hasPartialRegUpdate(MI->getOpcode())) return 0; @@ -3840,7 +3878,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Unless optimizing for size, don't fold to avoid partial // register update stalls - if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + if (!MF.getFunction()->getFnAttributes(). + hasAttribute(Attributes::OptimizeForSize) && hasPartialRegUpdate(MI->getOpcode())) return 0; @@ -3850,15 +3889,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = (*LoadMI->memoperands_begin())->getAlignment(); else switch (LoadMI->getOpcode()) { - case X86::AVX_SET0PSY: - case X86::AVX_SET0PDY: case X86::AVX2_SETALLONES: - case X86::AVX2_SET0: + case X86::AVX_SET0: Alignment = 32; break; case X86::V_SET0: case X86::V_SETALLONES: - case X86::AVX_SETALLONES: Alignment = 16; break; case X86::FsFLD0SD: @@ -3894,11 +3930,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, switch (LoadMI->getOpcode()) { case X86::V_SET0: case X86::V_SETALLONES: - case X86::AVX_SET0PSY: - case X86::AVX_SET0PDY: - case X86::AVX_SETALLONES: case X86::AVX2_SETALLONES: - case X86::AVX2_SET0: + case X86::AVX_SET0: case X86::FsFLD0SD: case X86::FsFLD0SS: { // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. @@ -3930,15 +3963,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Ty = Type::getFloatTy(MF.getFunction()->getContext()); else if (Opc == X86::FsFLD0SD) Ty = Type::getDoubleTy(MF.getFunction()->getContext()); - else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY) - Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); - else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX2_SET0) + else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0) Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8); else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); - bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES || - Opc == X86::AVX2_SETALLONES); + bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES); const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) : Constant::getNullValue(Ty); unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); @@ -4013,6 +4043,8 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, OpcodeTablePtr = &RegOp2MemOpTable1; } else if (OpNum == 2) { OpcodeTablePtr = &RegOp2MemOpTable2; + } else if (OpNum == 3) { + OpcodeTablePtr = &RegOp2MemOpTable3; } if (OpcodeTablePtr && OpcodeTablePtr->count(Opc)) @@ -4102,7 +4134,6 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, getUndefRegState(MO.isUndef())); } // Change CMP32ri r, 0 back to TEST32rr r, r, etc. - unsigned NewOpc = 0; switch (DataMI->getOpcode()) { default: break; case X86::CMP64ri32: @@ -4115,8 +4146,9 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, MachineOperand &MO0 = DataMI->getOperand(0); MachineOperand &MO1 = DataMI->getOperand(1); if (MO1.getImm() == 0) { + unsigned NewOpc; switch (DataMI->getOpcode()) { - default: break; + default: llvm_unreachable("Unreachable!"); case X86::CMP64ri8: case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; case X86::CMP32ri8: diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h index b6f69af..260f054 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h @@ -61,6 +61,9 @@ namespace X86 { // Turn condition code into conditional branch opcode. unsigned GetCondBranchFromCond(CondCode CC); + // Turn CMov opcode into condition code. + CondCode getCondFromCMovOpc(unsigned Opc); + /// GetOppositeBranchCondition - Return the inverse of the specified cond, /// e.g. turning COND_E to COND_NE. CondCode GetOppositeBranchCondition(X86::CondCode CC); diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td index d293156..650fa95 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td @@ -114,7 +114,7 @@ def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>; def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER, - [SDNPHasChain]>; + [SDNPHasChain,SDNPSideEffect]>; def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER, [SDNPHasChain]>; def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER, @@ -216,6 +216,14 @@ def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR, def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, [SDNPHasChain]>; +def X86eh_sjlj_setjmp : SDNode<"X86ISD::EH_SJLJ_SETJMP", + SDTypeProfile<1, 1, [SDTCisInt<0>, + SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPSideEffect]>; +def X86eh_sjlj_longjmp : SDNode<"X86ISD::EH_SJLJ_LONGJMP", + SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPSideEffect]>; + def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; @@ -397,7 +405,7 @@ def i64mem_TC : Operand<i64> { let OperandType = "OPERAND_PCREL", ParserMatchClass = X86AbsMemAsmOperand, - PrintMethod = "print_pcrel_imm" in { + PrintMethod = "printPCRelImm" in { def i32imm_pcrel : Operand<i32>; def i16imm_pcrel : Operand<i16>; @@ -418,7 +426,7 @@ def SSECC : Operand<i8> { } def AVXCC : Operand<i8> { - let PrintMethod = "printSSECC"; + let PrintMethod = "printAVXCC"; let OperandType = "OPERAND_IMMEDIATE"; } @@ -499,7 +507,7 @@ def i64i32imm : Operand<i64> { // 64-bits but only 32 bits are significant, and those bits are treated as being // pc relative. def i64i32imm_pcrel : Operand<i64> { - let PrintMethod = "print_pcrel_imm"; + let PrintMethod = "printPCRelImm"; let ParserMatchClass = X86AbsMemAsmOperand; let OperandType = "OPERAND_PCREL"; } @@ -552,14 +560,21 @@ def HasMMX : Predicate<"Subtarget->hasMMX()">; def Has3DNow : Predicate<"Subtarget->has3DNow()">; def Has3DNowA : Predicate<"Subtarget->has3DNowA()">; def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; +def UseSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">; def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; +def UseSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">; def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; +def UseSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">; def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; +def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">; def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; +def UseSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">; def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; +def UseSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">; def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; +def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; @@ -574,6 +589,7 @@ def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">; def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; def HasBMI : Predicate<"Subtarget->hasBMI()">; def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; +def HasRTM : Predicate<"Subtarget->hasRTM()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; @@ -1259,28 +1275,46 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), // Atomic support // - // Atomic swap. These are just normal xchg instructions. But since a memory // operand is referenced, the atomicity is ensured. +multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag, + InstrItinClass itin> { + let Constraints = "$val = $dst" in { + def #NAME#8rm : I<opc8, MRMSrcMem, (outs GR8:$dst), + (ins GR8:$val, i8mem:$ptr), + !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"), + [(set + GR8:$dst, + (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))], + itin>; + def #NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst), + (ins GR16:$val, i16mem:$ptr), + !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"), + [(set + GR16:$dst, + (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))], + itin>, OpSize; + def #NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$val, i32mem:$ptr), + !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"), + [(set + GR32:$dst, + (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))], + itin>; + def #NAME#64rm : RI<opc, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$val, i64mem:$ptr), + !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"), + [(set + GR64:$dst, + (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))], + itin>; + } +} + +defm XCHG : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap", IIC_XCHG_MEM>; + +// Swap between registers. let Constraints = "$val = $dst" in { -def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), - "xchg{b}\t{$val, $ptr|$ptr, $val}", - [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))], - IIC_XCHG_MEM>; -def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),(ins GR16:$val, i16mem:$ptr), - "xchg{w}\t{$val, $ptr|$ptr, $val}", - [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))], - IIC_XCHG_MEM>, - OpSize; -def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),(ins GR32:$val, i32mem:$ptr), - "xchg{l}\t{$val, $ptr|$ptr, $val}", - [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))], - IIC_XCHG_MEM>; -def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),(ins GR64:$val,i64mem:$ptr), - "xchg{q}\t{$val, $ptr|$ptr, $val}", - [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))], - IIC_XCHG_MEM>; - def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src), "xchg{b}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>; def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src), @@ -1291,6 +1325,7 @@ def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src), "xchg{q}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>; } +// Swap between EAX and other registers. def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src), "xchg{w}\t{$src, %ax|AX, $src}", [], IIC_XCHG_REG>, OpSize; def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src), @@ -1672,6 +1707,8 @@ include "X86Instr3DNow.td" include "X86InstrVMX.td" include "X86InstrSVM.td" +include "X86InstrTSX.td" + // System instructions. include "X86InstrSystem.td" diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td index c8f40bb..127af6f 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrMMX.td +++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td @@ -118,11 +118,11 @@ let Constraints = "$src1 = $dst" in { /// Unary MMX instructions requiring SSSE3. multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr, Intrinsic IntId64, OpndItins itins> { - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), + def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>; - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), + def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 (bitconvert (memopmmx addr:$src))))], @@ -134,11 +134,11 @@ let ImmT = NoImm, Constraints = "$src1 = $dst" in { multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, Intrinsic IntId64, OpndItins itins> { let isCommutable = 0 in - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), + def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>; - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), + def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, @@ -149,11 +149,11 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, /// PALIGN MMX instructions (require SSSE3). multiclass ssse3_palign_mm<string asm, Intrinsic IntId> { - def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), + def R64irr : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>; - def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), + def R64irm : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, @@ -163,12 +163,10 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId> { multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, OpndItins itins, Domain d> { - def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (Int SrcRC:$src))], - itins.rr, d>; - def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (Int (ld_frag addr:$src)))], - itins.rm, d>; + def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>; + def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>; } multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -209,8 +207,14 @@ def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), let mayStore = 1 in def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>; -def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src), - "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_REG_MM>; + +// Low word of MMX to GPR. +def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1, + [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>; +def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, + (MMX_X86movd2w (x86mmx VR64:$src)))], IIC_MMX_MOV_REG_MM>; let neverHasSideEffects = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), @@ -243,29 +247,30 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), [(store (x86mmx VR64:$src), addr:$dst)], IIC_MMX_MOVQ_RM>; -def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), - (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, - (x86mmx (bitconvert - (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))))], - IIC_MMX_MOVQ_RR>; - -def MMX_MOVQ2DQrr : S2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst), - (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2i64 (scalar_to_vector - (i64 (bitconvert (x86mmx VR64:$src))))))], - IIC_MMX_MOVQ_RR>; +def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), + (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, + (x86mmx (bitconvert + (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))))))], + IIC_MMX_MOVQ_RR>; + +def MMX_MOVQ2DQrr : MMXS2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst), + (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 + (scalar_to_vector + (i64 (bitconvert (x86mmx VR64:$src))))))], + IIC_MMX_MOVQ_RR>; let neverHasSideEffects = 1 in -def MMX_MOVQ2FR64rr: S2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst), - (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [], - IIC_MMX_MOVQ_RR>; +def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst), + (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", + [], IIC_MMX_MOVQ_RR>; -def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), - (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", [], - IIC_MMX_MOVQ_RR>; +def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), + (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", + [], IIC_MMX_MOVQ_RR>; def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movntq\t{$src, $dst|$dst, $src}", @@ -577,6 +582,7 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), IIC_MMX_MASKMOV>; // 64-bit bit convert. +let Predicates = [HasSSE2] in { def : Pat<(x86mmx (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; def : Pat<(i64 (bitconvert (x86mmx VR64:$src))), @@ -585,5 +591,6 @@ def : Pat<(f64 (bitconvert (x86mmx VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; def : Pat<(x86mmx (bitconvert (f64 FR64:$src))), (MMX_MOVFR642Qrr FR64:$src)>; +} diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td index 20dc81e..6f48d7e 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td @@ -251,35 +251,37 @@ def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), // A 128-bit subvector extract from the first 256-bit vector position // is a subregister copy that needs no instruction. -def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (i32 0))), +def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (iPTR 0))), (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>; -def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (i32 0))), +def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (iPTR 0))), (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>; -def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (i32 0))), +def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (iPTR 0))), (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>; -def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (i32 0))), +def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (iPTR 0))), (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>; -def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (i32 0))), +def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>; -def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (i32 0))), +def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>; // A 128-bit subvector insert to the first 256-bit vector position // is a subregister copy that needs no instruction. -def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (i32 0)), +let AddedComplexity = 25 in { // to give priority over vinsertf128rm +def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; -def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (i32 0)), +def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; -def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (i32 0)), +def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; -def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (i32 0)), +def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; -def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (i32 0)), +def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; -def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)), +def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; +} // Implicitly promote a 32-bit scalar to a vector. def : Pat<(v4f32 (scalar_to_vector FR32:$src)), @@ -362,7 +364,7 @@ let Predicates = [HasAVX] in { def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>; } -// Alias instructions that map fld0 to pxor for sse. +// Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isPseudo = 1 in { @@ -382,11 +384,11 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, neverHasSideEffects = 1 in { -def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", []>; + isPseudo = 1 in { +def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4f32 immAllZerosV))]>; } -def : Pat<(v4f32 immAllZerosV), (V_SET0)>; def : Pat<(v2f64 immAllZerosV), (V_SET0)>; def : Pat<(v4i32 immAllZerosV), (V_SET0)>; def : Pat<(v2i64 immAllZerosV), (V_SET0)>; @@ -394,35 +396,29 @@ def : Pat<(v8i16 immAllZerosV), (V_SET0)>; def : Pat<(v16i8 immAllZerosV), (V_SET0)>; -// The same as done above but for AVX. The 256-bit ISA does not support PI, +// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, // and doesn't need it because on sandy bridge the register is set to zero // at the rename stage without using any execution unit, so SET0PSY // and SET0PDY can be used for vector int instructions without penalty -// FIXME: Change encoding to pseudo! This is blocked right now by the x86 -// JIT implementatioan, it does not expand the instructions below like -// X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1 in { -let Predicates = [HasAVX] in { -def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", - [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V; -def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", - [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V; -} -let Predicates = [HasAVX2], neverHasSideEffects = 1 in -def AVX2_SET0 : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "", - []>, VEX_4V; + isPseudo = 1, Predicates = [HasAVX] in { +def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v8f32 immAllZerosV))]>; } -let Predicates = [HasAVX2], AddedComplexity = 5 in { - def : Pat<(v4i64 immAllZerosV), (AVX2_SET0)>; - def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>; - def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>; - def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>; +let Predicates = [HasAVX] in + def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; + +let Predicates = [HasAVX2] in { + def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; + def : Pat<(v8i32 immAllZerosV), (AVX_SET0)>; + def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; + def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; } -// AVX has no support for 256-bit integer instructions, but since the 128-bit +// AVX1 has no support for 256-bit integer instructions, but since the 128-bit // VPXOR instruction writes zero to its upper part, it's safe build zeros. +let Predicates = [HasAVX1Only] in { def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; def : Pat<(bc_v32i8 (v8f32 immAllZerosV)), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; @@ -438,22 +434,17 @@ def : Pat<(bc_v8i32 (v8f32 immAllZerosV)), def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; +} // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. -// FIXME: Change encoding to pseudo! This is blocked right now by the x86 -// JIT implementation, it does not expand the instructions below like -// X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, ExeDomain = SSEPackedInt in { - let Predicates = [HasAVX] in - def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V; - def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllOnesV))]>; + isPseudo = 1 in { + def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllOnesV))]>; let Predicates = [HasAVX2] in - def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "", - [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V; + def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v8i32 immAllOnesV))]>; } @@ -605,27 +596,27 @@ let Predicates = [HasAVX] in { // Represent the same patterns above but in the form they appear for // 256-bit types def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, - (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))), + (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, - (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))), + (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, - (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))), + (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; } def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, - (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))), + (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)), sub_xmm)>; def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, - (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))), + (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))), (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))), + (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>; // Move low f64 and clear high bits. @@ -704,7 +695,7 @@ let Predicates = [HasAVX] in { (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { let AddedComplexity = 15 in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVSS to the lower bits. @@ -738,7 +729,7 @@ let Predicates = [HasSSE1] in { (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { let AddedComplexity = 15 in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVSD to the lower bits. @@ -822,16 +813,16 @@ defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, - TB, VEX; + TB, VEX, VEX_L; defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, - TB, OpSize, VEX; + TB, OpSize, VEX, VEX_L; defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", SSEPackedSingle, SSE_MOVU_ITINS>, - TB, VEX; + TB, VEX, VEX_L; defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, - TB, OpSize, VEX; + TB, OpSize, VEX, VEX_L; defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, TB; @@ -864,19 +855,19 @@ def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore256 (v8f32 VR256:$src), addr:$dst)], - IIC_SSE_MOVA_P_MR>, VEX; + IIC_SSE_MOVA_P_MR>, VEX, VEX_L; def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movapd\t{$src, $dst|$dst, $src}", [(alignedstore256 (v4f64 VR256:$src), addr:$dst)], - IIC_SSE_MOVA_P_MR>, VEX; + IIC_SSE_MOVA_P_MR>, VEX, VEX_L; def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movups\t{$src, $dst|$dst, $src}", [(store (v8f32 VR256:$src), addr:$dst)], - IIC_SSE_MOVU_P_MR>, VEX; + IIC_SSE_MOVU_P_MR>, VEX, VEX_L; def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)], - IIC_SSE_MOVU_P_MR>, VEX; + IIC_SSE_MOVU_P_MR>, VEX, VEX_L; // For disassembler let isCodeGenOnly = 1 in { @@ -899,33 +890,33 @@ let isCodeGenOnly = 1 in { def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movaps\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L; def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movapd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L; def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movups\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L; def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L; } let Predicates = [HasAVX] in { def : Pat<(v8i32 (X86vzmovl - (insert_subvector undef, (v4i32 VR128:$src), (i32 0)))), + (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl - (insert_subvector undef, (v2i64 VR128:$src), (i32 0)))), + (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; def : Pat<(v8f32 (X86vzmovl - (insert_subvector undef, (v4f32 VR128:$src), (i32 0)))), + (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; def : Pat<(v4f64 (X86vzmovl - (insert_subvector undef, (v2f64 VR128:$src), (i32 0)))), + (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; } @@ -975,10 +966,10 @@ let Predicates = [HasAVX] in { (VMOVUPDmr addr:$dst, VR128:$src)>; } -let Predicates = [HasSSE1] in +let Predicates = [UseSSE1] in def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), (MOVUPSmr addr:$dst, VR128:$src)>; -let Predicates = [HasSSE2] in +let Predicates = [UseSSE2] in def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), (MOVUPDmr addr:$dst, VR128:$src)>; @@ -1028,12 +1019,52 @@ let Predicates = [HasAVX] in { (VMOVUPSYmr addr:$dst, VR256:$src)>; def : Pat<(store (v32i8 VR256:$src), addr:$dst), (VMOVUPSYmr addr:$dst, VR256:$src)>; + + // Special patterns for storing subvector extracts of lower 128-bits + // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr + def : Pat<(alignedstore (v2f64 (extract_subvector + (v4f64 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(alignedstore (v4f32 (extract_subvector + (v8f32 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(alignedstore (v2i64 (extract_subvector + (v4i64 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(alignedstore (v4i32 (extract_subvector + (v8i32 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(alignedstore (v8i16 (extract_subvector + (v16i16 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(alignedstore (v16i8 (extract_subvector + (v32i8 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + + def : Pat<(store (v2f64 (extract_subvector + (v4f64 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVUPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(store (v4f32 (extract_subvector + (v8f32 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVUPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(store (v2i64 (extract_subvector + (v4i64 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVUPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(store (v4i32 (extract_subvector + (v8i32 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(store (v8i16 (extract_subvector + (v16i16 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(store (v16i8 (extract_subvector + (v32i8 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; } // Use movaps / movups for SSE integer load / store (one byte shorter). // The instructions selected below are then converted to MOVDQA/MOVDQU // during the SSE domain pass. -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { def : Pat<(alignedloadv2i64 addr:$src), (MOVAPSrm addr:$src)>; def : Pat<(loadv2i64 addr:$src), @@ -1180,7 +1211,7 @@ let Predicates = [HasAVX] in { (VMOVLPDmr addr:$src1, VR128:$src2)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)), (iPTR 0))), addr:$src1), @@ -1205,7 +1236,7 @@ let Predicates = [HasSSE1] in { (MOVLPSmr addr:$src1, VR128:$src2)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // Shuffle with MOVLPD def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>; @@ -1271,7 +1302,7 @@ let Predicates = [HasAVX] in { (VMOVHPSrm VR128:$src1, addr:$src2)>; // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem - // is during lowering, where it's not possible to recognize the load fold + // is during lowering, where it's not possible to recognize the load fold // cause it has two uses through a bitcast. One use disappears at isel time // and the fold opportunity reappears. def : Pat<(v2f64 (X86Unpckl VR128:$src1, @@ -1279,7 +1310,7 @@ let Predicates = [HasAVX] in { (VMOVHPDrm VR128:$src1, addr:$src2)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { // MOVHPS patterns def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), @@ -1289,9 +1320,9 @@ let Predicates = [HasSSE1] in { (MOVHPSrm VR128:$src1, addr:$src2)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem - // is during lowering, where it's not possible to recognize the load fold + // is during lowering, where it's not possible to recognize the load fold // cause it has two uses through a bitcast. One use disappears at isel time // and the fold opportunity reappears. def : Pat<(v2f64 (X86Unpckl VR128:$src1, @@ -1346,7 +1377,7 @@ let Predicates = [HasAVX] in { (VMOVHLPSrr VR128:$src1, VR128:$src2)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { // MOVLHPS patterns def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), (MOVLHPSrr VR128:$src1, VR128:$src2)>; @@ -1456,7 +1487,7 @@ def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>; -let Predicates = [HasAVX], AddedComplexity = 1 in { +let Predicates = [HasAVX] in { def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), @@ -1628,12 +1659,12 @@ defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem, "vcvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, SSE_CVT_PS>, - TB, VEX, Requires<[HasAVX]>; + TB, VEX, VEX_L, Requires<[HasAVX]>; defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, SSE_CVT_PS>, - TB, Requires<[HasSSE2]>; + TB, Requires<[UseSSE2]>; /// SSE 2 Only @@ -1663,7 +1694,7 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), [(set FR32:$dst, (fround (loadf64 addr:$src)))], IIC_SSE_CVT_Scalar_RM>, XD, - Requires<[HasSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>; def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1684,13 +1715,13 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>; def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>; } // Convert scalar single to scalar double @@ -1709,30 +1740,28 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; } -let AddedComplexity = 1 in { // give AVX priority - def : Pat<(f64 (fextend FR32:$src)), - (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>; - def : Pat<(fextend (loadf32 addr:$src)), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>; +def : Pat<(f64 (fextend FR32:$src)), + (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>; +def : Pat<(fextend (loadf32 addr:$src)), + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>; - def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[HasAVX, OptForSize]>; - def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, - Requires<[HasAVX, OptForSpeed]>; -} // AddedComplexity = 1 +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, + Requires<[HasAVX, OptForSpeed]>; def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fextend FR32:$src))], IIC_SSE_CVT_Scalar_RR>, XS, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (extloadf32 addr:$src))], IIC_SSE_CVT_Scalar_RM>, XS, - Requires<[HasSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>; // extload f32 -> f64. This matches load+fextend because we have a hack in // the isel (PreprocessForFPConvert) that can introduce loads after dag @@ -1740,9 +1769,9 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), // Since these loads aren't folded into the fextend, we have to match it // explicitly here. def : Pat<(fextend (loadf32 addr:$src)), - (CVTSS2SDrm addr:$src)>, Requires<[HasSSE2]>; + (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>; def : Pat<(extloadf32 addr:$src), - (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>; + (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1762,13 +1791,13 @@ def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>; def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>; } // Convert packed single/double fp to doubleword @@ -1785,12 +1814,12 @@ def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; + IIC_SSE_CVT_PS_RM>, VEX, VEX_L; def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], @@ -1824,7 +1853,7 @@ def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX; + (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L; def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -1860,12 +1889,12 @@ def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L; def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; + IIC_SSE_CVT_PS_RM>, VEX, VEX_L; def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", @@ -1904,7 +1933,7 @@ let Predicates = [HasAVX] in { (VCVTTPS2DQYrm addr:$src)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), (CVTDQ2PSrr VR128:$src)>; def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), @@ -1945,7 +1974,7 @@ def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvtt_pd2dq_256 VR256:$src))], - IIC_SSE_CVT_PD_RR>, VEX; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -1978,31 +2007,31 @@ def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], IIC_SSE_CVT_PD_RR>, TB, VEX; -let neverHasSideEffects = 1, mayLoad = 1 in def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RM>, TB, VEX; + "vcvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], + IIC_SSE_CVT_PD_RM>, TB, VEX; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB, VEX; + IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB, VEX; + IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], IIC_SSE_CVT_PD_RR>, TB; -let neverHasSideEffects = 1, mayLoad = 1 in def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RM>, TB; + "cvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], + IIC_SSE_CVT_PD_RM>, TB; } // Convert Packed DW Integers to Packed Double FP @@ -2019,11 +2048,11 @@ def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtdq2_pd_256 - (bitconvert (memopv2i64 addr:$src))))]>, VEX; + (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L; def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX; + (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L; } let neverHasSideEffects = 1, mayLoad = 1 in @@ -2066,7 +2095,7 @@ def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], - IIC_SSE_CVT_PD_RR>, VEX; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2096,6 +2125,10 @@ let Predicates = [HasAVX] in { (VCVTDQ2PSYrm addr:$src)>; // Match fround and fextend for 128/256-bit conversions + def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), + (VCVTPD2PSrr VR128:$src)>; + def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))), + (VCVTPD2PSXrm addr:$src)>; def : Pat<(v4f32 (fround (v4f64 VR256:$src))), (VCVTPD2PSYrr VR256:$src)>; def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), @@ -2105,12 +2138,17 @@ let Predicates = [HasAVX] in { (VCVTPS2PDrr VR128:$src)>; def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), (VCVTPS2PDYrr VR128:$src)>; - def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), + def : Pat<(v4f64 (extloadv4f32 addr:$src)), (VCVTPS2PDYrm addr:$src)>; } -let Predicates = [HasSSE2] in { - // Match fextend for 128 conversions +let Predicates = [UseSSE2] in { + // Match fround and fextend for 128 conversions + def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), + (CVTPD2PSrr VR128:$src)>; + def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))), + (CVTPD2PSrm addr:$src)>; + def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), (CVTPS2PDrr VR128:$src)>; } @@ -2121,7 +2159,7 @@ let Predicates = [HasSSE2] in { // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, - Operand CC, SDNode OpNode, ValueType VT, + Operand CC, SDNode OpNode, ValueType VT, PatFrag ld_frag, string asm, string asm_alt, OpndItins itins> { def rr : SIi8<0xC2, MRMSrcReg, @@ -2267,7 +2305,7 @@ let Defs = [EFLAGS] in { // sse12_cmp_packed - sse 1 & 2 compare packed instructions multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, - Operand CC, Intrinsic Int, string asm, + Operand CC, Intrinsic Int, string asm, string asm_alt, Domain d> { def rri : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, @@ -2300,11 +2338,11 @@ defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd, defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256, "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", - SSEPackedSingle>, TB, VEX_4V; + SSEPackedSingle>, TB, VEX_4V, VEX_L; defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256, "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", - SSEPackedDouble>, TB, OpSize, VEX_4V; + SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps, "cmp${cc}ps\t{$src2, $dst|$dst, $src2}", @@ -2336,14 +2374,14 @@ def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)), (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), @@ -2374,13 +2412,13 @@ defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, memopv4f32, SSEPackedSingle>, TB, VEX_4V; defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - memopv8f32, SSEPackedSingle>, TB, VEX_4V; + memopv8f32, SSEPackedSingle>, TB, VEX_4V, VEX_L; defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V; defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", - memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V; + memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; let Constraints = "$src1 = $dst" in { defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, @@ -2420,7 +2458,7 @@ let Predicates = [HasAVX] in { (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { def : Pat<(v4i32 (X86Shufp VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; @@ -2428,7 +2466,7 @@ let Predicates = [HasSSE1] in { (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // Generic SHUFPD patterns def : Pat<(v2i64 (X86Shufp VR128:$src1, (memopv2i64 addr:$src2), (i8 imm:$imm))), @@ -2474,16 +2512,16 @@ defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64, defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, memopv8f32, VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, TB, VEX_4V; + SSEPackedSingle>, TB, VEX_4V, VEX_L; defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, memopv4f64, VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, TB, OpSize, VEX_4V; + SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, memopv8f32, VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, TB, VEX_4V; + SSEPackedSingle>, TB, VEX_4V, VEX_L; defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, memopv4f64, VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, TB, OpSize, VEX_4V; + SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; let Constraints = "$src1 = $dst" in { defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, @@ -2500,7 +2538,27 @@ let Constraints = "$src1 = $dst" in { SSEPackedDouble>, TB, OpSize; } // Constraints = "$src1 = $dst" -let Predicates = [HasAVX], AddedComplexity = 1 in { +let Predicates = [HasAVX1Only] in { + def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), + (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; + + def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), + (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; +} + +let Predicates = [HasAVX] in { // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel @@ -2509,7 +2567,7 @@ let Predicates = [HasAVX], AddedComplexity = 1 in { (VUNPCKLPDrr VR128:$src, VR128:$src)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel @@ -2540,10 +2598,11 @@ let Predicates = [HasAVX] in { "movmskpd", SSEPackedDouble>, TB, OpSize, VEX; defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, - "movmskps", SSEPackedSingle>, TB, VEX; + "movmskps", SSEPackedSingle>, TB, + VEX, VEX_L; defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, "movmskpd", SSEPackedDouble>, TB, - OpSize, VEX; + OpSize, VEX, VEX_L; def : Pat<(i32 (X86fgetsign FR32:$src)), (VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>; @@ -2564,11 +2623,11 @@ let Predicates = [HasAVX] in { OpSize, VEX; def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, - SSEPackedSingle>, TB, VEX; + SSEPackedSingle>, TB, VEX, VEX_L; def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, SSEPackedDouble>, TB, - OpSize, VEX; + OpSize, VEX, VEX_L; } defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", @@ -2578,16 +2637,16 @@ defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", def : Pat<(i32 (X86fgetsign FR32:$src)), (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; def : Pat<(i64 (X86fgetsign FR32:$src)), (MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; def : Pat<(i32 (X86fgetsign FR64:$src)), (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; def : Pat<(i64 (X86fgetsign FR64:$src)), (MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Logical Instructions @@ -2646,13 +2705,13 @@ defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64, let Predicates = [HasAVX2] in { defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64, - i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; + i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64, - i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; + i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64, - i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; + i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64, - i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V; + i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V, VEX_L; } //===----------------------------------------------------------------------===// @@ -2683,14 +2742,12 @@ multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr, } // Alias bitwise logical operations using SSE logical ops on packed FP values. -let mayLoad = 0 in { - defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand, - SSE_BIT_ITINS_P>; - defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for, - SSE_BIT_ITINS_P>; - defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor, - SSE_BIT_ITINS_P>; -} +defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand, + SSE_BIT_ITINS_P>; +defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for, + SSE_BIT_ITINS_P>; +defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor, + SSE_BIT_ITINS_P>; let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef, @@ -2740,7 +2797,7 @@ multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "ps"), f256mem, [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), - (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V; + (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L; defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f256mem, @@ -2748,7 +2805,7 @@ multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr, (bc_v4i64 (v4f64 VR256:$src2))))], [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), (memopv4i64 addr:$src2)))], 0>, - TB, OpSize, VEX_4V; + TB, OpSize, VEX_4V, VEX_L; } // AVX 256-bit packed logical ops forms @@ -2794,27 +2851,23 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, SizeItins itins, bit Is2Addr = 1> { - let mayLoad = 0 in { defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, v4f32, f128mem, memopv4f32, SSEPackedSingle, itins.s, Is2Addr>, TB; defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, v2f64, f128mem, memopv2f64, SSEPackedDouble, itins.d, Is2Addr>, TB, OpSize; - } } multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode, SizeItins itins> { - let mayLoad = 0 in { - defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256, + defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256, v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>, - TB; - defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256, + TB, VEX_L; + defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256, v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>, - TB, OpSize; - } + TB, OpSize, VEX_L; } multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, @@ -2846,11 +2899,11 @@ multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr, SizeItins itins> { defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256, !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32, - SSEPackedSingle, itins.s, 0>, TB; + SSEPackedSingle, itins.s, 0>, TB, VEX_L; defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256, !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64, - SSEPackedDouble, itins.d, 0>, TB, OpSize; + SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_L; } // Binary Arithmetic instructions @@ -2872,7 +2925,8 @@ let isCommutable = 0 in { basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, VEX_4V; + basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, + VEX_4V; defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>, VEX_4V, VEX_LIG; @@ -2923,6 +2977,23 @@ let Constraints = "$src1 = $dst" in { } } +let isCodeGenOnly = 1 in { + defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>, + VEX_4V, VEX_LIG; + defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>, + basic_sse12_fp_binop_p_y<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, VEX_4V; + defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>, + VEX_4V, VEX_LIG; + defm VMINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P, 0>, + basic_sse12_fp_binop_p_y<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, VEX_4V; + let Constraints = "$src1 = $dst" in { + defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>; + defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>; + } +} + /// Unop Arithmetic /// In addition, we also have a special variant of the scalar form here to /// represent the associated intrinsic operation. This form is unlike the @@ -2960,7 +3031,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[HasSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>; def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>; @@ -2974,7 +3045,7 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - let mayLoad = 1 in + let mayLoad = 1 in { def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; @@ -2982,6 +3053,7 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { (ins VR128:$src1, ssmem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + } } /// sse1_fp_unop_p - SSE1 unops in packed form. @@ -3001,11 +3073,11 @@ multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode, def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], - itins.rr>; + itins.rr>, VEX_L; def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))], - itins.rm>; + itins.rm>, VEX_L; } /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. @@ -3027,11 +3099,11 @@ multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V4F32Int VR256:$src))], - itins.rr>; + itins.rr>, VEX_L; def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))], - itins.rm>; + itins.rm>, VEX_L; } /// sse2_fp_unop_s - SSE2 unops in scalar form. @@ -3044,7 +3116,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD, - Requires<[HasSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>; def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>; @@ -3054,20 +3126,20 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, } /// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. +let hasSideEffects = 0 in multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { - let neverHasSideEffects = 1 in { def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - let mayLoad = 1 in + let mayLoad = 1 in { def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - } def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + } } /// sse2_fp_unop_p - SSE2 unops in vector forms. @@ -3087,11 +3159,11 @@ multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode, def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], - itins.rr>; + itins.rr>, VEX_L; def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))], - itins.rm>; + itins.rm>, VEX_L; } /// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms. @@ -3113,11 +3185,11 @@ multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V2F64Int VR256:$src))], - itins.rr>; + itins.rr>, VEX_L; def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))], - itins.rm>; + itins.rm>, VEX_L; } let Predicates = [HasAVX] in { @@ -3158,7 +3230,6 @@ let Predicates = [HasAVX] in { SSE_RCPP>, VEX; } -let AddedComplexity = 1 in { def : Pat<(f32 (fsqrt FR32:$src)), (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; def : Pat<(f32 (fsqrt (load addr:$src))), @@ -3181,9 +3252,8 @@ def : Pat<(f32 (X86frcp FR32:$src)), def : Pat<(f32 (X86frcp (load addr:$src))), (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX, OptForSize]>; -} -let Predicates = [HasAVX], AddedComplexity = 1 in { +let Predicates = [HasAVX] in { def : Pat<(int_x86_sse_sqrt_ss VR128:$src), (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS VR128:$src, FR32)), @@ -3223,17 +3293,52 @@ defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>, sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>; +/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. +multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode, + Intrinsic F32Int, OpndItins itins> { + def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set FR32:$dst, (OpNode FR32:$src))]>; + // For scalar unary operations, fold a load into the operation + // only in OptForSize mode. It eliminates an instruction, but it also + // eliminates a whole-register clobber (the load), so it introduces a + // partial register update condition. + def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, + Requires<[UseSSE1, OptForSize]>; + let Constraints = "$src1 = $dst" in { + def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), + [], itins.rr>; + def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, ssmem:$src2), + !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), + [], itins.rm>; + } +} + // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss, - SSE_SQRTS>, +defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss, + SSE_SQRTS>, sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>, sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, SSE_SQRTS>; -defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss, - SSE_RCPS>, +let Predicates = [UseSSE1] in { + def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), + (RSQRTSSr_Int VR128:$src, VR128:$src)>; +} + +defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss, + SSE_RCPS>, sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>, sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>; +let Predicates = [UseSSE1] in { + def : Pat<(int_x86_sse_rcp_ss VR128:$src), + (RCPSSr_Int VR128:$src, VR128:$src)>; +} // There is no f64 version of the reciprocal approximation instructions. @@ -3271,20 +3376,20 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions "movntps\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v8f32 VR256:$src), addr:$dst)], - IIC_SSE_MOVNT>, VEX; + IIC_SSE_MOVNT>, VEX, VEX_L; def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movntpd\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f64 VR256:$src), addr:$dst)], - IIC_SSE_MOVNT>, VEX; + IIC_SSE_MOVNT>, VEX, VEX_L; let ExeDomain = SSEPackedInt in def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movntdq\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4i64 VR256:$src), addr:$dst)], - IIC_SSE_MOVNT>, VEX; + IIC_SSE_MOVNT>, VEX, VEX_L; } let AddedComplexity = 400 in { // Prefer non-temporal versions @@ -3304,7 +3409,7 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), IIC_SSE_MOVNT>; def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), @@ -3393,14 +3498,14 @@ def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), VEX; def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, - VEX; + VEX, VEX_L; } def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, VEX; def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, - VEX; + VEX, VEX_L; // For Disassembler let isCodeGenOnly = 1 in { @@ -3410,16 +3515,14 @@ def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), VEX; def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movdqa\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, - VEX; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L; def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, VEX; def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movdqu\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, - VEX; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L; } let canFoldAsLoad = 1, mayLoad = 1 in { @@ -3428,14 +3531,14 @@ def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), VEX; def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, - VEX; + VEX, VEX_L; let Predicates = [HasAVX] in { def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, XS, VEX; def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, - XS, VEX; + XS, VEX, VEX_L; } } @@ -3447,14 +3550,14 @@ def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, - VEX; + VEX, VEX_L; let Predicates = [HasAVX] in { def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, XS, VEX; def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, - XS, VEX; + XS, VEX, VEX_L; } } @@ -3464,7 +3567,7 @@ def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>; + [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; // For Disassembler let isCodeGenOnly = 1 in { @@ -3474,7 +3577,7 @@ def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>; + [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; } let canFoldAsLoad = 1, mayLoad = 1 in { @@ -3486,7 +3589,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (loadv2i64 addr:$src))*/], IIC_SSE_MOVU_P_RM>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; } let mayStore = 1 in { @@ -3498,7 +3601,7 @@ def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(store (v2i64 VR128:$src), addr:$dst)*/], IIC_SSE_MOVU_P_MR>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; } // Intrinsic forms of MOVDQU load and store @@ -3512,7 +3615,7 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)], IIC_SSE_MOVU_P_MR>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; } // ExeDomain = SSEPackedInt @@ -3690,82 +3793,82 @@ defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, let Predicates = [HasAVX2] in { defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPADDQY : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64, - i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V; + i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64, - i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; + i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; defm VPSUBQY : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64, - i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V; + i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V, VEX_L; defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32, VR256, memopv4i64, i256mem, - SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; + SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; // Intrinsic forms defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; defm VPSUBSWY : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; defm VPADDSBY : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPADDSWY : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w, VR256, memopv4i64, i256mem, - SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; + SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPMULHWY : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w, VR256, memopv4i64, i256mem, - SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; + SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd, VR256, memopv4i64, i256mem, - SSE_PMADD, 1, 0>, VEX_4V; + SSE_PMADD, 1, 0>, VEX_4V, VEX_L; defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; } let Constraints = "$src1 = $dst" in { @@ -3901,30 +4004,30 @@ let ExeDomain = SSEPackedInt in { let Predicates = [HasAVX2] in { defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, VR256, v16i16, v8i16, bc_v8i16, - SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, VR256, v8i32, v4i32, bc_v4i32, - SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, VR256, v4i64, v2i64, bc_v2i64, - SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, VR256, v16i16, v8i16, bc_v8i16, - SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, VR256, v8i32, v4i32, bc_v4i32, - SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, VR256, v4i64, v2i64, bc_v2i64, - SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, VR256, v16i16, v8i16, bc_v8i16, - SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR256, v8i32, v4i32, bc_v4i32, - SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; let ExeDomain = SSEPackedInt in { // 256-bit logical shifts. @@ -3933,13 +4036,13 @@ let ExeDomain = SSEPackedInt in { "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR256:$dst, (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>, - VEX_4V; + VEX_4V, VEX_L; def VPSRLDQYri : PDIi8<0x73, MRM3r, (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR256:$dst, (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>, - VEX_4V; + VEX_4V, VEX_L; // PSRADQYri doesn't exist in SSE[1-3]. } } // Predicates = [HasAVX2] @@ -4010,7 +4113,7 @@ let Predicates = [HasAVX2] in { (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), @@ -4053,22 +4156,22 @@ let Predicates = [HasAVX] in { let Predicates = [HasAVX2] in { defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; } let Constraints = "$src1 = $dst" in { @@ -4111,13 +4214,13 @@ defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128, let Predicates = [HasAVX2] in { defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb, VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; } let Constraints = "$src1 = $dst" in { @@ -4187,12 +4290,15 @@ let Predicates = [HasAVX] in { } let Predicates = [HasAVX2] in { - defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>, TB, OpSize, VEX; - defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>, XS, VEX; - defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, XD, VEX; + defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>, + TB, OpSize, VEX,VEX_L; + defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>, + XS, VEX, VEX_L; + defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, + XD, VEX, VEX_L; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { let AddedComplexity = 5 in defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize; @@ -4268,22 +4374,22 @@ let Predicates = [HasAVX] in { let Predicates = [HasAVX2] in { defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl, - bc_v32i8>, VEX_4V; + bc_v32i8>, VEX_4V, VEX_L; defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl, - bc_v16i16>, VEX_4V; + bc_v16i16>, VEX_4V, VEX_L; defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl, - bc_v8i32>, VEX_4V; + bc_v8i32>, VEX_4V, VEX_L; defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, - bc_v4i64>, VEX_4V; + bc_v4i64>, VEX_4V, VEX_L; defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh, - bc_v32i8>, VEX_4V; + bc_v32i8>, VEX_4V, VEX_L; defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh, - bc_v16i16>, VEX_4V; + bc_v16i16>, VEX_4V, VEX_L; defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh, - bc_v8i32>, VEX_4V; + bc_v8i32>, VEX_4V, VEX_L; defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, - bc_v4i64>, VEX_4V; + bc_v4i64>, VEX_4V, VEX_L; } let Constraints = "$src1 = $dst" in { @@ -4307,28 +4413,6 @@ let Constraints = "$src1 = $dst" in { } } // ExeDomain = SSEPackedInt -// Patterns for using AVX1 instructions with integer vectors -// Here to give AVX2 priority -let Predicates = [HasAVX] in { - def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), - (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), - (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), - (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), - (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; - - def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))), - (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), - (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))), - (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), - (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; -} - //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Extract and Insert //===---------------------------------------------------------------------===// @@ -4377,7 +4461,7 @@ let Predicates = [HasAVX] in { } let Constraints = "$src1 = $dst" in - defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>; + defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[UseSSE2]>; } // ExeDomain = SSEPackedInt @@ -4397,9 +4481,9 @@ def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), let Predicates = [HasAVX2] in { def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), "pmovmskb\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX; + [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX, VEX_L; def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), - "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX; + "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; } def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), @@ -4538,7 +4622,7 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), // Move Packed Doubleword Int first element to Doubleword Int // def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "vmov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, @@ -4654,14 +4738,14 @@ let Predicates = [HasAVX] in { } // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, - (v4i32 (scalar_to_vector GR32:$src)),(i32 0)))), + (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector GR64:$src)),(i32 0)))), + (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; } -let Predicates = [HasSSE2], AddedComplexity = 20 in { +let Predicates = [UseSSE2], AddedComplexity = 20 in { def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), @@ -4701,7 +4785,7 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), [(set VR128:$dst, (v2i64 (scalar_to_vector (loadi64 addr:$src))))], IIC_SSE_MOVDQ>, XS, - Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix + Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix //===---------------------------------------------------------------------===// // Move Packed Quadword Int to Quadword Int @@ -4744,7 +4828,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; let Predicates = [HasAVX], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), @@ -4755,7 +4839,7 @@ let Predicates = [HasAVX], AddedComplexity = 20 in { (VMOVZQI2PQIrm addr:$src)>; } -let Predicates = [HasSSE2], AddedComplexity = 20 in { +let Predicates = [UseSSE2], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (MOVZQI2PQIrm addr:$src)>; def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), @@ -4785,7 +4869,7 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], IIC_SSE_MOVQ_RR>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; let AddedComplexity = 20 in def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), @@ -4800,7 +4884,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), [(set VR128:$dst, (v2i64 (X86vzmovl (loadv2i64 addr:$src))))], IIC_SSE_MOVDQ>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; } let AddedComplexity = 20 in { @@ -4810,7 +4894,7 @@ let AddedComplexity = 20 in { def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (VMOVZPQILo2PQIrr VR128:$src)>; } - let Predicates = [HasSSE2] in { + let Predicates = [UseSSE2] in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (MOVZPQILo2PQIrm addr:$src)>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), @@ -4862,9 +4946,9 @@ let Predicates = [HasAVX] in { defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", v4f32, VR128, memopv4f32, f128mem>, VEX; defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", - v8f32, VR256, memopv8f32, f256mem>, VEX; + v8f32, VR256, memopv8f32, f256mem>, VEX, VEX_L; defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", - v8f32, VR256, memopv8f32, f256mem>, VEX; + v8f32, VR256, memopv8f32, f256mem>, VEX, VEX_L; } defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, memopv4f32, f128mem>; @@ -4890,7 +4974,7 @@ let Predicates = [HasAVX] in { (VMOVSLDUPYrm addr:$src)>; } -let Predicates = [HasSSE3] in { +let Predicates = [UseSSE3] in { def : Pat<(v4i32 (X86Movshdup VR128:$src)), (MOVSHDUPrr VR128:$src)>; def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), @@ -4932,7 +5016,7 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), let Predicates = [HasAVX] in { defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; - defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX; + defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L; } defm MOVDDUP : sse3_replicate_dfp<"movddup">; @@ -4959,7 +5043,7 @@ let Predicates = [HasAVX] in { (VMOVDDUPYrr VR256:$src)>; } -let Predicates = [HasSSE3] in { +let Predicates = [UseSSE3] in { def : Pat<(X86Movddup (memopv2f64 addr:$src)), (MOVDDUPrm addr:$src)>; def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), @@ -4981,7 +5065,8 @@ let Predicates = [HasAVX] in { [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX; + [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, + VEX, VEX_L; } def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "lddqu\t{$src, $dst|$dst, $src}", @@ -5014,16 +5099,16 @@ let Predicates = [HasAVX] in { defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, f128mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V; defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, - f256mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V; + f256mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V, VEX_L; } let ExeDomain = SSEPackedDouble in { defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, f128mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V; defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, - f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V; + f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V, VEX_L; } } -let Constraints = "$src1 = $dst", Predicates = [HasSSE3] in { +let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { let ExeDomain = SSEPackedSingle in defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, f128mem, SSE_ALU_F32P>, TB, XD; @@ -5075,9 +5160,9 @@ let Predicates = [HasAVX] in { defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, X86fhsub, 0>, VEX_4V; defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, - X86fhadd, 0>, VEX_4V; + X86fhadd, 0>, VEX_4V, VEX_L; defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, - X86fhsub, 0>, VEX_4V; + X86fhsub, 0>, VEX_4V, VEX_L; } let ExeDomain = SSEPackedDouble in { defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, @@ -5085,9 +5170,9 @@ let Predicates = [HasAVX] in { defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, X86fhsub, 0>, VEX_4V; defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, - X86fhadd, 0>, VEX_4V; + X86fhadd, 0>, VEX_4V, VEX_L; defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, - X86fhsub, 0>, VEX_4V; + X86fhsub, 0>, VEX_4V, VEX_L; } } @@ -5153,11 +5238,11 @@ let Predicates = [HasAVX] in { let Predicates = [HasAVX2] in { defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", - int_x86_avx2_pabs_b>, VEX; + int_x86_avx2_pabs_b>, VEX, VEX_L; defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", - int_x86_avx2_pabs_w>, VEX; + int_x86_avx2_pabs_w>, VEX, VEX_L; defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", - int_x86_avx2_pabs_d>, VEX; + int_x86_avx2_pabs_d>, VEX, VEX_L; } defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", @@ -5296,37 +5381,37 @@ let ImmT = NoImm, Predicates = [HasAVX2] in { let isCommutable = 0 in { defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256, memopv4i64, i256mem, - SSE_PHADDSUBW, 0>, VEX_4V; + SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256, memopv4i64, i256mem, - SSE_PHADDSUBW, 0>, VEX_4V; + SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256, memopv4i64, i256mem, - SSE_PHADDSUBW, 0>, VEX_4V; + SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256, memopv4i64, i256mem, - SSE_PHADDSUBW, 0>, VEX_4V; + SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256, memopv4i64, i256mem, - SSE_PHADDSUBW, 0>, VEX_4V; + SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256, memopv4i64, i256mem, - SSE_PHADDSUBW, 0>, VEX_4V; + SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256, memopv4i64, i256mem, - SSE_PHADDSUBW, 0>, VEX_4V; + SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256, memopv4i64, i256mem, - SSE_PHADDSUBW, 0>, VEX_4V; + SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", - int_x86_avx2_phadd_sw>, VEX_4V; + int_x86_avx2_phadd_sw>, VEX_4V, VEX_L; defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", - int_x86_avx2_phsub_sw>, VEX_4V; + int_x86_avx2_phsub_sw>, VEX_4V, VEX_L; defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", - int_x86_avx2_pmadd_ub_sw>, VEX_4V; + int_x86_avx2_pmadd_ub_sw>, VEX_4V, VEX_L; } defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", - int_x86_avx2_pmul_hr_sw>, VEX_4V; + int_x86_avx2_pmul_hr_sw>, VEX_4V, VEX_L; } // None of these have i8 immediate fields. @@ -5405,8 +5490,8 @@ multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> { let Predicates = [HasAVX] in defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; let Predicates = [HasAVX2] in - defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V; -let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in + defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V, VEX_L; +let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in defm PALIGN : ssse3_palign<"palignr">; let Predicates = [HasAVX2] in { @@ -5431,7 +5516,7 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } -let Predicates = [HasSSSE3] in { +let Predicates = [UseSSSE3] in { def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), @@ -5512,17 +5597,17 @@ defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>, let Predicates = [HasAVX2] in { defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw", - int_x86_avx2_pmovsxbw>, VEX; + int_x86_avx2_pmovsxbw>, VEX, VEX_L; defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd", - int_x86_avx2_pmovsxwd>, VEX; + int_x86_avx2_pmovsxwd>, VEX, VEX_L; defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq", - int_x86_avx2_pmovsxdq>, VEX; + int_x86_avx2_pmovsxdq>, VEX, VEX_L; defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw", - int_x86_avx2_pmovzxbw>, VEX; + int_x86_avx2_pmovzxbw>, VEX, VEX_L; defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd", - int_x86_avx2_pmovzxwd>, VEX; + int_x86_avx2_pmovzxwd>, VEX, VEX_L; defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq", - int_x86_avx2_pmovzxdq>, VEX; + int_x86_avx2_pmovzxdq>, VEX, VEX_L; } defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>; @@ -5538,64 +5623,88 @@ let Predicates = [HasAVX] in { (VPMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), (VPMOVSXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))), + (VPMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), (VPMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), (VPMOVSXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))), + (VPMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), (VPMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), (VPMOVSXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))), + (VPMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), (VPMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), (VPMOVZXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))), + (VPMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), (VPMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), (VPMOVZXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))), + (VPMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), (VPMOVZXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), (VPMOVZXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))), + (VPMOVZXDQrm addr:$src)>; } -let Predicates = [HasSSE41] in { +let Predicates = [UseSSE41] in { // Common patterns involving scalar load. def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), (PMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), (PMOVSXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))), + (PMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), (PMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), (PMOVSXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))), + (PMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), (PMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), (PMOVSXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))), + (PMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), (PMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), (PMOVZXBWrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))), + (PMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), (PMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), (PMOVZXWDrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))), + (PMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), (PMOVZXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), (PMOVZXDQrm addr:$src)>; + def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))), + (PMOVZXDQrm addr:$src)>; } let Predicates = [HasAVX2] in { @@ -5615,7 +5724,7 @@ let Predicates = [HasAVX] in { def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; } -let Predicates = [HasSSE41] in { +let Predicates = [UseSSE41] in { def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; } @@ -5659,13 +5768,13 @@ defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>, let Predicates = [HasAVX2] in { defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd", - int_x86_avx2_pmovsxbd>, VEX; + int_x86_avx2_pmovsxbd>, VEX, VEX_L; defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq", - int_x86_avx2_pmovsxwq>, VEX; + int_x86_avx2_pmovsxwq>, VEX, VEX_L; defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd", - int_x86_avx2_pmovzxbd>, VEX; + int_x86_avx2_pmovzxbd>, VEX, VEX_L; defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq", - int_x86_avx2_pmovzxwq>, VEX; + int_x86_avx2_pmovzxwq>, VEX, VEX_L; } defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>; @@ -5686,7 +5795,7 @@ let Predicates = [HasAVX] in { (VPMOVZXWQrm addr:$src)>; } -let Predicates = [HasSSE41] in { +let Predicates = [UseSSE41] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), (PMOVSXBDrm addr:$src)>; @@ -5734,9 +5843,9 @@ defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>, } let Predicates = [HasAVX2] in { defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq", - int_x86_avx2_pmovsxbq>, VEX; + int_x86_avx2_pmovsxbq>, VEX, VEX_L; defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq", - int_x86_avx2_pmovzxbq>, VEX; + int_x86_avx2_pmovzxbq>, VEX, VEX_L; } defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; @@ -5754,7 +5863,7 @@ let Predicates = [HasAVX] in { (VPMOVZXBQrm addr:$src)>; } -let Predicates = [HasSSE41] in { +let Predicates = [UseSSE41] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq (bitconvert (v4i32 (X86vzmovl @@ -5767,6 +5876,100 @@ let Predicates = [HasSSE41] in { (PMOVZXBQrm addr:$src)>; } +let Predicates = [HasAVX2] in { + def : Pat<(v16i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWYrr VR128:$src)>; + def : Pat<(v8i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDYrr VR128:$src)>; + def : Pat<(v4i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQYrr VR128:$src)>; + + def : Pat<(v8i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDYrr VR128:$src)>; + def : Pat<(v4i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQYrr VR128:$src)>; + + def : Pat<(v4i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr VR128:$src)>; + + def : Pat<(v16i16 (X86vzext (v32i8 VR256:$src))), + (VPMOVZXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v8i32 (X86vzext (v32i8 VR256:$src))), + (VPMOVZXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vzext (v32i8 VR256:$src))), + (VPMOVZXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v8i32 (X86vzext (v16i16 VR256:$src))), + (VPMOVZXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vzext (v16i16 VR256:$src))), + (VPMOVZXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v4i64 (X86vzext (v8i32 VR256:$src))), + (VPMOVZXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; +} + +let Predicates = [HasAVX] in { + def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBWrr VR128:$src)>; + def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBDrr VR128:$src)>; + def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (VPMOVZXBQrr VR128:$src)>; + + def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWDrr VR128:$src)>; + def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQrr VR128:$src)>; + + def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQrr VR128:$src)>; + + def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVZXBWrm addr:$src)>; + def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVZXBWrm addr:$src)>; + def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (VPMOVZXBDrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))), + (VPMOVZXBQrm addr:$src)>; + + def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVZXWDrm addr:$src)>; + def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVZXWDrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (VPMOVZXWQrm addr:$src)>; + + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVZXDQrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVZXDQrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))), + (VPMOVZXDQrm addr:$src)>; +} + +let Predicates = [UseSSE41] in { + def : Pat<(v8i16 (X86vzext (v16i8 VR128:$src))), (PMOVZXBWrr VR128:$src)>; + def : Pat<(v4i32 (X86vzext (v16i8 VR128:$src))), (PMOVZXBDrr VR128:$src)>; + def : Pat<(v2i64 (X86vzext (v16i8 VR128:$src))), (PMOVZXBQrr VR128:$src)>; + + def : Pat<(v4i32 (X86vzext (v8i16 VR128:$src))), (PMOVZXWDrr VR128:$src)>; + def : Pat<(v2i64 (X86vzext (v8i16 VR128:$src))), (PMOVZXWQrr VR128:$src)>; + + def : Pat<(v2i64 (X86vzext (v4i32 VR128:$src))), (PMOVZXDQrr VR128:$src)>; + + def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), + (PMOVZXBWrm addr:$src)>; + def : Pat<(v8i16 (X86vzext (v16i8 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), + (PMOVZXBWrm addr:$src)>; + def : Pat<(v4i32 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVZXBDrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v16i8 (bitconvert (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))), + (PMOVZXBQrm addr:$src)>; + + def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), + (PMOVZXWDrm addr:$src)>; + def : Pat<(v4i32 (X86vzext (v8i16 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), + (PMOVZXWDrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v8i16 (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVZXWQrm addr:$src)>; + + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), + (PMOVZXDQrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src))))))), + (PMOVZXDQrm addr:$src)>; + def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))), + (PMOVZXDQrm addr:$src)>; +} + //===----------------------------------------------------------------------===// // SSE4.1 - Extract Instructions //===----------------------------------------------------------------------===// @@ -5900,7 +6103,7 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))), addr:$dst), (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, - Requires<[HasSSE41]>; + Requires<[UseSSE41]>; //===----------------------------------------------------------------------===// // SSE4.1 - Insert Instructions @@ -6147,7 +6350,7 @@ let Predicates = [HasAVX] in { defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256, memopv8f32, memopv4f64, int_x86_avx_round_ps_256, - int_x86_avx_round_pd_256>, VEX; + int_x86_avx_round_pd_256>, VEX, VEX_L; defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", int_x86_sse41_round_ss, int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG; @@ -6172,6 +6375,15 @@ let Predicates = [HasAVX] in { (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; def : Pat<(f64 (ftrunc FR64:$src)), (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; + + def : Pat<(v4f32 (ffloor VR128:$src)), + (VROUNDPSr VR128:$src, (i32 0x1))>; + def : Pat<(v2f64 (ffloor VR128:$src)), + (VROUNDPDr VR128:$src, (i32 0x1))>; + def : Pat<(v8f32 (ffloor VR256:$src)), + (VROUNDYPSr VR256:$src, (i32 0x1))>; + def : Pat<(v4f64 (ffloor VR256:$src)), + (VROUNDYPDr VR256:$src, (i32 0x1))>; } defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, @@ -6181,26 +6393,33 @@ let Constraints = "$src1 = $dst" in defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", int_x86_sse41_round_ss, int_x86_sse41_round_sd>; -def : Pat<(ffloor FR32:$src), - (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; -def : Pat<(f64 (ffloor FR64:$src)), - (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; -def : Pat<(f32 (fnearbyint FR32:$src)), - (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; -def : Pat<(f64 (fnearbyint FR64:$src)), - (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; -def : Pat<(f32 (fceil FR32:$src)), - (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; -def : Pat<(f64 (fceil FR64:$src)), - (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; -def : Pat<(f32 (frint FR32:$src)), - (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; -def : Pat<(f64 (frint FR64:$src)), - (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; -def : Pat<(f32 (ftrunc FR32:$src)), - (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; -def : Pat<(f64 (ftrunc FR64:$src)), - (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; +let Predicates = [UseSSE41] in { + def : Pat<(ffloor FR32:$src), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; + def : Pat<(f64 (ffloor FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; + def : Pat<(f32 (fnearbyint FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; + def : Pat<(f64 (fnearbyint FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; + def : Pat<(f32 (fceil FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; + def : Pat<(f64 (fceil FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; + def : Pat<(f32 (frint FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; + def : Pat<(f64 (frint FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; + def : Pat<(f32 (ftrunc FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; + def : Pat<(f64 (ftrunc FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; + + def : Pat<(v4f32 (ffloor VR128:$src)), + (ROUNDPSr VR128:$src, (i32 0x1))>; + def : Pat<(v2f64 (ffloor VR128:$src)), + (ROUNDPDr VR128:$src, (i32 0x1))>; +} //===----------------------------------------------------------------------===// // SSE4.1 - Packed Bit Test @@ -6221,11 +6440,11 @@ def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, - OpSize, VEX; + OpSize, VEX, VEX_L; def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>, - OpSize, VEX; + OpSize, VEX, VEX_L; } let Defs = [EFLAGS] in { @@ -6254,11 +6473,13 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, let Defs = [EFLAGS], Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>; -defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>; +defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>, + VEX_L; } let ExeDomain = SSEPackedDouble in { defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>; -defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>; +defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>, + VEX_L; } } @@ -6338,7 +6559,7 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; } -/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator +/// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr, Intrinsic IntId256> { let isCommutable = 1 in @@ -6381,25 +6602,25 @@ let Predicates = [HasAVX] in { let Predicates = [HasAVX2] in { let isCommutable = 0 in defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw", - int_x86_avx2_packusdw>, VEX_4V; + int_x86_avx2_packusdw>, VEX_4V, VEX_L; defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb", - int_x86_avx2_pmins_b>, VEX_4V; + int_x86_avx2_pmins_b>, VEX_4V, VEX_L; defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd", - int_x86_avx2_pmins_d>, VEX_4V; + int_x86_avx2_pmins_d>, VEX_4V, VEX_L; defm VPMINUD : SS41I_binop_rm_int_y<0x3B, "vpminud", - int_x86_avx2_pminu_d>, VEX_4V; + int_x86_avx2_pminu_d>, VEX_4V, VEX_L; defm VPMINUW : SS41I_binop_rm_int_y<0x3A, "vpminuw", - int_x86_avx2_pminu_w>, VEX_4V; + int_x86_avx2_pminu_w>, VEX_4V, VEX_L; defm VPMAXSB : SS41I_binop_rm_int_y<0x3C, "vpmaxsb", - int_x86_avx2_pmaxs_b>, VEX_4V; + int_x86_avx2_pmaxs_b>, VEX_4V, VEX_L; defm VPMAXSD : SS41I_binop_rm_int_y<0x3D, "vpmaxsd", - int_x86_avx2_pmaxs_d>, VEX_4V; + int_x86_avx2_pmaxs_d>, VEX_4V, VEX_L; defm VPMAXUD : SS41I_binop_rm_int_y<0x3F, "vpmaxud", - int_x86_avx2_pmaxu_d>, VEX_4V; + int_x86_avx2_pmaxu_d>, VEX_4V, VEX_L; defm VPMAXUW : SS41I_binop_rm_int_y<0x3E, "vpmaxuw", - int_x86_avx2_pmaxu_w>, VEX_4V; + int_x86_avx2_pmaxu_w>, VEX_4V, VEX_L; defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", - int_x86_avx2_pmul_dq>, VEX_4V; + int_x86_avx2_pmul_dq>, VEX_4V, VEX_L; } let Constraints = "$src1 = $dst" in { @@ -6445,9 +6666,9 @@ let Predicates = [HasAVX] in { } let Predicates = [HasAVX2] in { defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, - memopv4i64, i256mem, 0>, VEX_4V; + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, - memopv4i64, i256mem, 0>, VEX_4V; + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; } let Constraints = "$src1 = $dst" in { @@ -6490,13 +6711,15 @@ let Predicates = [HasAVX] in { defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, VR128, memopv4f32, f128mem, 0>, VEX_4V; defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", - int_x86_avx_blend_ps_256, VR256, memopv8f32, f256mem, 0>, VEX_4V; + int_x86_avx_blend_ps_256, VR256, memopv8f32, + f256mem, 0>, VEX_4V, VEX_L; } let ExeDomain = SSEPackedDouble in { defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, VR128, memopv2f64, f128mem, 0>, VEX_4V; defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", - int_x86_avx_blend_pd_256, VR256, memopv4f64, f256mem, 0>, VEX_4V; + int_x86_avx_blend_pd_256,VR256, memopv4f64, + f256mem, 0>, VEX_4V, VEX_L; } defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, VR128, memopv2i64, i128mem, 0>, VEX_4V; @@ -6511,15 +6734,15 @@ let Predicates = [HasAVX] in { VR128, memopv2f64, f128mem, 0>, VEX_4V; let ExeDomain = SSEPackedSingle in defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, - VR256, memopv8f32, i256mem, 0>, VEX_4V; + VR256, memopv8f32, i256mem, 0>, VEX_4V, VEX_L; } let Predicates = [HasAVX2] in { let isCommutable = 0 in { defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw, - VR256, memopv4i64, i256mem, 0>, VEX_4V; + VR256, memopv4i64, i256mem, 0>, VEX_4V, VEX_L; defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, - VR256, memopv4i64, i256mem, 0>, VEX_4V; + VR256, memopv4i64, i256mem, 0>, VEX_4V, VEX_L; } } @@ -6570,13 +6793,13 @@ let ExeDomain = SSEPackedDouble in { defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem, memopv2f64, int_x86_sse41_blendvpd>; defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem, - memopv4f64, int_x86_avx_blendv_pd_256>; + memopv4f64, int_x86_avx_blendv_pd_256>, VEX_L; } // ExeDomain = SSEPackedDouble let ExeDomain = SSEPackedSingle in { defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem, memopv4f32, int_x86_sse41_blendvps>; defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem, - memopv8f32, int_x86_avx_blendv_ps_256>; + memopv8f32, int_x86_avx_blendv_ps_256>, VEX_L; } // ExeDomain = SSEPackedSingle defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, memopv2i64, int_x86_sse41_pblendvb>; @@ -6584,7 +6807,7 @@ defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, let Predicates = [HasAVX2] in { defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem, - memopv4i64, int_x86_avx2_pblendvb>; + memopv4i64, int_x86_avx2_pblendvb>, VEX_L; } let Predicates = [HasAVX] in { @@ -6687,7 +6910,7 @@ def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>; -let Predicates = [HasSSE41] in { +let Predicates = [UseSSE41] in { def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1), (v16i8 VR128:$src2))), (PBLENDVBrr0 VR128:$src2, VR128:$src1)>; @@ -6725,7 +6948,7 @@ let Predicates = [HasAVX2] in def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>, - OpSize, VEX; + OpSize, VEX, VEX_L; def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, @@ -6761,7 +6984,7 @@ let Predicates = [HasAVX] in let Predicates = [HasAVX2] in defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, - memopv4i64, i256mem, 0>, VEX_4V; + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; let Constraints = "$src1 = $dst" in defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, @@ -6779,34 +7002,31 @@ multiclass pseudo_pcmpistrm<string asm> { imm:$src3))]>; def MEM : PseudoI<(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 - VR128:$src1, (load addr:$src2), imm:$src3))]>; + [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>; } let Defs = [EFLAGS], usesCustomInserter = 1 in { - let AddedComplexity = 1 in - defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; - defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>; + defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; + defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>; } -let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in { - def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; +multiclass pcmpistrm_SS42AI<string asm> { + def rr : SS42AI<0x62, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + []>, OpSize; let mayLoad = 1 in - def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; + def rm :SS42AI<0x62, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + []>, OpSize; } let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in { - def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; - let mayLoad = 1 in - def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; + let Predicates = [HasAVX] in + defm VPCMPISTRM128 : pcmpistrm_SS42AI<"vpcmpistrm">, VEX; + defm PCMPISTRM128 : pcmpistrm_SS42AI<"pcmpistrm"> ; } // Packed Compare Explicit Length Strings, Return Mask @@ -6817,74 +7037,103 @@ multiclass pseudo_pcmpestrm<string asm> { VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; def MEM : PseudoI<(outs VR128:$dst), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>; + [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, + (bc_v16i8 (memopv2i64 addr:$src3)), EDX, imm:$src5))]>; } let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { - let AddedComplexity = 1 in - defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; - defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>; + defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; + defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>; } -let Predicates = [HasAVX], - Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { - def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; +multiclass SS42AI_pcmpestrm<string asm> { + def rr : SS42AI<0x60, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + []>, OpSize; let mayLoad = 1 in - def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; + def rm : SS42AI<0x60, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + []>, OpSize; } let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { - def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; - let mayLoad = 1 in - def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; + let Predicates = [HasAVX] in + defm VPCMPESTRM128 : SS42AI_pcmpestrm<"vpcmpestrm">, VEX; + defm PCMPESTRM128 : SS42AI_pcmpestrm<"pcmpestrm">; } // Packed Compare Implicit Length Strings, Return Index -let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in { - multiclass SS42AI_pcmpistri<string asm> { - def rr : SS42AI<0x63, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, OpSize; - let mayLoad = 1 in - def rm : SS42AI<0x63, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, OpSize; - } +multiclass pseudo_pcmpistri<string asm> { + def REG : PseudoI<(outs GR32:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + [(set GR32:$dst, EFLAGS, + (X86pcmpistri VR128:$src1, VR128:$src2, imm:$src3))]>; + def MEM : PseudoI<(outs GR32:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + [(set GR32:$dst, EFLAGS, (X86pcmpistri VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>; } -let Predicates = [HasAVX] in -defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; -defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; +let Defs = [EFLAGS], usesCustomInserter = 1 in { + defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI">, Requires<[HasAVX]>; + defm PCMPISTRI : pseudo_pcmpistri<"#PCMPISTRI">, Requires<[UseSSE42]>; +} + +multiclass SS42AI_pcmpistri<string asm> { + def rr : SS42AI<0x63, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + []>, OpSize; + let mayLoad = 1 in + def rm : SS42AI<0x63, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + []>, OpSize; +} + +let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in { + let Predicates = [HasAVX] in + defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; + defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; +} // Packed Compare Explicit Length Strings, Return Index -let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { - multiclass SS42AI_pcmpestri<string asm> { - def rr : SS42AI<0x61, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, OpSize; - let mayLoad = 1 in - def rm : SS42AI<0x61, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, OpSize; - } +multiclass pseudo_pcmpestri<string asm> { + def REG : PseudoI<(outs GR32:$dst), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + [(set GR32:$dst, EFLAGS, + (X86pcmpestri VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; + def MEM : PseudoI<(outs GR32:$dst), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + [(set GR32:$dst, EFLAGS, + (X86pcmpestri VR128:$src1, EAX, (bc_v16i8 (memopv2i64 addr:$src3)), EDX, + imm:$src5))]>; } -let Predicates = [HasAVX] in -defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; -defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; +let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { + defm VPCMPESTRI : pseudo_pcmpestri<"#VPCMPESTRI">, Requires<[HasAVX]>; + defm PCMPESTRI : pseudo_pcmpestri<"#PCMPESTRI">, Requires<[UseSSE42]>; +} + +multiclass SS42AI_pcmpestri<string asm> { + def rr : SS42AI<0x61, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + []>, OpSize; + let mayLoad = 1 in + def rm : SS42AI<0x61, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + []>, OpSize; +} + +let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { + let Predicates = [HasAVX] in + defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; + defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; +} //===----------------------------------------------------------------------===// // SSE4.2 - CRC Instructions @@ -7175,27 +7424,27 @@ let ExeDomain = SSEPackedSingle in { def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, int_x86_avx_vbroadcast_ss>; def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, - int_x86_avx_vbroadcast_ss_256>; + int_x86_avx_vbroadcast_ss_256>, VEX_L; } let ExeDomain = SSEPackedDouble in def VBROADCASTSDYrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, - int_x86_avx_vbroadcast_sd_256>; + int_x86_avx_vbroadcast_sd_256>, VEX_L; def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, - int_x86_avx_vbroadcastf128_pd_256>; + int_x86_avx_vbroadcastf128_pd_256>, VEX_L; let ExeDomain = SSEPackedSingle in { def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128, int_x86_avx2_vbroadcast_ss_ps>; def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256, - int_x86_avx2_vbroadcast_ss_ps_256>; + int_x86_avx2_vbroadcast_ss_ps_256>, VEX_L; } let ExeDomain = SSEPackedDouble in def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, - int_x86_avx2_vbroadcast_sd_pd_256>; + int_x86_avx2_vbroadcast_sd_pd_256>, VEX_L; let Predicates = [HasAVX2] in def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, - int_x86_avx2_vbroadcasti128>; + int_x86_avx2_vbroadcasti128>, VEX_L; let Predicates = [HasAVX] in def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), @@ -7209,50 +7458,69 @@ let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR128:$src2, i8imm:$src3), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, VEX_4V; + []>, VEX_4V, VEX_L; let mayLoad = 1 in def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f128mem:$src2, i8imm:$src3), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, VEX_4V; + []>, VEX_4V, VEX_L; } let Predicates = [HasAVX] in { def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; + +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2), + (iPTR imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2), + (iPTR imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +} + +let Predicates = [HasAVX1Only] in { def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), - (i32 imm)), +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2), + (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), - (i32 imm)), +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), + (bc_v4i32 (memopv2i64 addr:$src2)), + (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), - (i32 imm)), +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), + (bc_v16i8 (memopv2i64 addr:$src2)), + (iPTR imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), + (bc_v8i16 (memopv2i64 addr:$src2)), + (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; } @@ -7264,64 +7532,69 @@ let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), (ins VR256:$src1, i8imm:$src2), "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, VEX; + []>, VEX, VEX_L; let mayStore = 1 in def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), (ins f128mem:$dst, VR256:$src1, i8imm:$src2), "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, VEX; -} - -// Extract and store. -let Predicates = [HasAVX] in { - def : Pat<(alignedstore (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), addr:$dst), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; - def : Pat<(alignedstore (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), addr:$dst), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; - def : Pat<(alignedstore (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), addr:$dst), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; - - def : Pat<(int_x86_sse_storeu_ps addr:$dst, (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2)), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; - def : Pat<(int_x86_sse2_storeu_pd addr:$dst, (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2)), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; - def : Pat<(int_x86_sse2_storeu_dq addr:$dst, (bc_v16i8 (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2))), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; + []>, VEX, VEX_L; } // AVX1 patterns let Predicates = [HasAVX] in { -def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), - (VEXTRACTF128rr VR256:$src1, imm:$src2)>; -def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), - (VEXTRACTF128rr VR256:$src1, imm:$src2)>; -def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), - (VEXTRACTF128rr VR256:$src1, imm:$src2)>; - -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v4f32 (VEXTRACTF128rr (v8f32 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v2f64 (VEXTRACTF128rr (v4f64 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + +def : Pat<(alignedstore (v4f32 (vextractf128_extract:$ext (v8f32 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v2f64 (vextractf128_extract:$ext (v4f64 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +} + +let Predicates = [HasAVX1Only] in { +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v2i64 (VEXTRACTF128rr - (v4i64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4i64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v4i32 (VEXTRACTF128rr - (v8i32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v8i32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v8i16 (VEXTRACTF128rr - (v16i16 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v16i16 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v16i8 (VEXTRACTF128rr - (v32i8 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + (v32i8 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + +def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; } //===----------------------------------------------------------------------===// @@ -7339,7 +7612,7 @@ multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, (ins VR256:$src1, f256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, - VEX_4V; + VEX_4V, VEX_L; def mr : AVX8I<opc_mr, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -7347,7 +7620,7 @@ multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V; + [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L; } let ExeDomain = SSEPackedSingle in @@ -7395,13 +7668,13 @@ let ExeDomain = SSEPackedSingle in { defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, memopv2i64, int_x86_avx_vpermilvar_ps, v4f32>; defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, - memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>; + memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>, VEX_L; } let ExeDomain = SSEPackedDouble in { defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, memopv2i64, int_x86_avx_vpermilvar_pd, v2f64>; defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, - memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>; + memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L; } let Predicates = [HasAVX] in { @@ -7429,38 +7702,38 @@ def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2, - (i8 imm:$src3))))]>, VEX_4V; + (i8 imm:$src3))))]>, VEX_4V, VEX_L; def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, i8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv8f32 addr:$src2), - (i8 imm:$src3)))]>, VEX_4V; + (i8 imm:$src3)))]>, VEX_4V, VEX_L; } let Predicates = [HasAVX] in { +def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, + (memopv4f64 addr:$src2), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +} + +let Predicates = [HasAVX1Only] in { def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, - (memopv8f32 addr:$src2), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2), (i8 imm:$imm))), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, - (memopv4f64 addr:$src2), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; @@ -7511,9 +7784,9 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { let Predicates = [HasAVX, HasF16C] in { defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>; - defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>; + defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L; defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>; - defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>; + defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L; } //===----------------------------------------------------------------------===// @@ -7545,7 +7818,7 @@ let isCommutable = 0 in { defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128, VR128, memopv2i64, i128mem>; defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, - VR256, memopv4i64, i256mem>; + VR256, memopv4i64, i256mem>, VEX_L; } //===----------------------------------------------------------------------===// @@ -7564,11 +7837,12 @@ multiclass avx2_broadcast<bits<8> opc, string OpcodeStr, (Int128 (scalar_to_vector (ld_frag addr:$src))))]>, VEX; def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (Int256 VR128:$src))]>, VEX; + [(set VR256:$dst, (Int256 VR128:$src))]>, VEX, VEX_L; def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, - (Int256 (scalar_to_vector (ld_frag addr:$src))))]>, VEX; + (Int256 (scalar_to_vector (ld_frag addr:$src))))]>, + VEX, VEX_L; } defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8, @@ -7647,19 +7921,22 @@ let Predicates = [HasAVX2] in { } // AVX1 broadcast patterns -let Predicates = [HasAVX] in { +let Predicates = [HasAVX1Only] in { def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), (VBROADCASTSSYrm addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), (VBROADCASTSDYrm addr:$src)>; +def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), + (VBROADCASTSSrm addr:$src)>; +} + +let Predicates = [HasAVX] in { def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), (VBROADCASTSSYrm addr:$src)>; def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), (VBROADCASTSDYrm addr:$src)>; def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), (VBROADCASTSSrm addr:$src)>; -def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), - (VBROADCASTSSrm addr:$src)>; // Provide fallback in case the load node that is used in the patterns above // is used by additional users, which prevents the pattern selection. @@ -7700,7 +7977,8 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, - (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, VEX_4V; + (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, + VEX_4V, VEX_L; def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, @@ -7708,7 +7986,7 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, [(set VR256:$dst, (OpVT (X86VPermv VR256:$src1, (bitconvert (mem_frag addr:$src2)))))]>, - VEX_4V; + VEX_4V, VEX_L; } defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, v8i32>; @@ -7722,14 +8000,15 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, - (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, VEX; + (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, + VEX, VEX_L; def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (OpVT (X86VPermi (mem_frag addr:$src1), - (i8 imm:$src2))))]>, VEX; + (i8 imm:$src2))))]>, VEX, VEX_L; } defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, v4i64>, VEX_W; @@ -7739,20 +8018,18 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W; //===----------------------------------------------------------------------===// // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks // -let AddedComplexity = 1 in { def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, - (i8 imm:$src3))))]>, VEX_4V; + (i8 imm:$src3))))]>, VEX_4V, VEX_L; def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, i8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2), - (i8 imm:$src3)))]>, VEX_4V; -} + (i8 imm:$src3)))]>, VEX_4V, VEX_L; -let Predicates = [HasAVX2], AddedComplexity = 1 in { +let Predicates = [HasAVX2] in { def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), @@ -7779,31 +8056,51 @@ let neverHasSideEffects = 1 in { def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR128:$src2, i8imm:$src3), "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, VEX_4V; + []>, VEX_4V, VEX_L; let mayLoad = 1 in def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i128mem:$src2, i8imm:$src3), "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, VEX_4V; + []>, VEX_4V, VEX_L; } -let Predicates = [HasAVX2], AddedComplexity = 1 in { +let Predicates = [HasAVX2] in { def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; + +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2), + (iPTR imm)), + (VINSERTI128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), + (bc_v4i32 (memopv2i64 addr:$src2)), + (iPTR imm)), + (VINSERTI128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), + (bc_v16i8 (memopv2i64 addr:$src2)), + (iPTR imm)), + (VINSERTI128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), + (bc_v8i16 (memopv2i64 addr:$src2)), + (iPTR imm)), + (VINSERTI128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; } //===----------------------------------------------------------------------===// @@ -7814,29 +8111,47 @@ def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst), "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>, - VEX; + VEX, VEX_L; let neverHasSideEffects = 1, mayStore = 1 in def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), (ins i128mem:$dst, VR256:$src1, i8imm:$src2), - "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; + "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX, VEX_L; -let Predicates = [HasAVX2], AddedComplexity = 1 in { -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +let Predicates = [HasAVX2] in { +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v2i64 (VEXTRACTI128rr (v4i64 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v4i32 (VEXTRACTI128rr (v8i32 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v8i16 (VEXTRACTI128rr (v16i16 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v16i8 (VEXTRACTI128rr (v32i8 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + +def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTI128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTI128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTI128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTI128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; } //===----------------------------------------------------------------------===// @@ -7852,7 +8167,8 @@ multiclass avx2_pmovmask<string OpcodeStr, def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, VEX_4V; + [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, + VEX_4V, VEX_L; def mr : AVX28I<0x8e, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -7860,7 +8176,7 @@ multiclass avx2_pmovmask<string OpcodeStr, def Ymr : AVX28I<0x8e, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V; + [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L; } defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", @@ -7898,14 +8214,14 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, - VEX_4V; + VEX_4V, VEX_L; def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (vt256 (bitconvert (memopv4i64 addr:$src2))))))]>, - VEX_4V; + VEX_4V, VEX_L; } defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>; diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td index bdeb63f..893488c 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -839,6 +839,16 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem, } // Defs = [EFLAGS] +def ROT32L2R_imm8 : SDNodeXForm<imm, [{ + // Convert a ROTL shamt to a ROTR shamt on 32-bit integer. + return getI8Imm(32 - N->getZExtValue()); +}]>; + +def ROT64L2R_imm8 : SDNodeXForm<imm, [{ + // Convert a ROTL shamt to a ROTR shamt on 64-bit integer. + return getI8Imm(64 - N->getZExtValue()); +}]>; + multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> { let neverHasSideEffects = 1 in { def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2), @@ -873,4 +883,72 @@ let Predicates = [HasBMI2] in { defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, VEX_W; defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8, OpSize; defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, OpSize, VEX_W; + + // Prefer RORX which is non-destructive and doesn't update EFLAGS. + let AddedComplexity = 10 in { + def : Pat<(rotl GR32:$src, (i8 imm:$shamt)), + (RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>; + def : Pat<(rotl GR64:$src, (i8 imm:$shamt)), + (RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>; + } + + def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)), + (RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>; + def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)), + (RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>; + + // Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not + // immedidate shift, i.e. the following code is considered better + // + // mov %edi, %esi + // shl $imm, %esi + // ... %edi, ... + // + // than + // + // movb $imm, %sil + // shlx %sil, %edi, %esi + // ... %edi, ... + // + let AddedComplexity = 1 in { + def : Pat<(sra GR32:$src1, GR8:$src2), + (SARX32rr GR32:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(sra GR64:$src1, GR8:$src2), + (SARX64rr GR64:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + + def : Pat<(srl GR32:$src1, GR8:$src2), + (SHRX32rr GR32:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(srl GR64:$src1, GR8:$src2), + (SHRX64rr GR64:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + + def : Pat<(shl GR32:$src1, GR8:$src2), + (SHLX32rr GR32:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(shl GR64:$src1, GR8:$src2), + (SHLX64rr GR64:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + } + + // Patterns on SARXrm/SHRXrm/SHLXrm are explicitly omitted to favor + // + // mov (%ecx), %esi + // shl $imm, $esi + // + // over + // + // movb $imm %al + // shlx %al, (%ecx), %esi + // + // As SARXrr/SHRXrr/SHLXrr is favored on variable shift, the peephole + // optimization will fold them into SARXrm/SHRXrm/SHLXrm if possible. } diff --git a/contrib/llvm/lib/Target/X86/X86InstrTSX.td b/contrib/llvm/lib/Target/X86/X86InstrTSX.td new file mode 100644 index 0000000..ad55058 --- /dev/null +++ b/contrib/llvm/lib/Target/X86/X86InstrTSX.td @@ -0,0 +1,32 @@ +//===-- X86InstrVMX.td - TSX Instruction Set Extension -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the instructions that make up the Intel TSX instruction +// set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// TSX instructions + +let usesCustomInserter = 1 in +def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins), + "# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>, + Requires<[HasRTM]>; + +let isBranch = 1, isTerminator = 1, Defs = [EAX] in +def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget:$dst), + "xbegin\t$dst", []>; + +def XEND : I<0x01, MRM_D5, (outs), (ins), + "xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>; + +def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm), + "xabort\t$imm", + [(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>; diff --git a/contrib/llvm/lib/Target/X86/X86InstrXOP.td b/contrib/llvm/lib/Target/X86/X86InstrXOP.td index 8ec2c68..2aa08fa 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrXOP.td +++ b/contrib/llvm/lib/Target/X86/X86InstrXOP.td @@ -75,10 +75,10 @@ multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> { def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (Int VR256:$src))]>, VEX; + [(set VR256:$dst, (Int VR256:$src))]>, VEX, VEX_L; def rmY : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX; + [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX, VEX_L; } let isAsmParserOnly = 1 in { @@ -238,7 +238,7 @@ multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> { !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR256:$dst, (Int VR256:$src1, VR256:$src2, VR256:$src3))]>, - VEX_4V, VEX_I8IMM; + VEX_4V, VEX_I8IMM, VEX_L; def rmY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i256mem:$src3), !strconcat(OpcodeStr, @@ -246,7 +246,7 @@ multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> { [(set VR256:$dst, (Int VR256:$src1, VR256:$src2, (bitconvert (memopv4i64 addr:$src3))))]>, - VEX_4V, VEX_I8IMM, VEX_W, MemOp4; + VEX_4V, VEX_I8IMM, VEX_W, MemOp4, VEX_L; def mrY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, VR256:$src3), !strconcat(OpcodeStr, @@ -254,7 +254,7 @@ multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> { [(set VR256:$dst, (Int VR256:$src1, (bitconvert (memopv4i64 addr:$src2)), VR256:$src3))]>, - VEX_4V, VEX_I8IMM; + VEX_4V, VEX_I8IMM, VEX_L; } let isAsmParserOnly = 1 in { @@ -287,20 +287,21 @@ multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128, !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), [(set VR256:$dst, - (Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>; + (Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>, VEX_L; def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2, (ld_256 addr:$src3), imm:$src4))]>, - VEX_W, MemOp4; + VEX_W, MemOp4, VEX_L; def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), [(set VR256:$dst, - (Int256 VR256:$src1, (ld_256 addr:$src2), VR256:$src3, imm:$src4))]>; + (Int256 VR256:$src1, (ld_256 addr:$src2), VR256:$src3, imm:$src4))]>, + VEX_L; } defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd, diff --git a/contrib/llvm/lib/Target/X86/X86JITInfo.cpp b/contrib/llvm/lib/Target/X86/X86JITInfo.cpp index 0168d12..764aa5d 100644 --- a/contrib/llvm/lib/Target/X86/X86JITInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86JITInfo.cpp @@ -532,6 +532,15 @@ uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) { #endif } +template<typename T> static void addUnaligned(void *Pos, T Delta) { + T Value; + std::memcpy(reinterpret_cast<char*>(&Value), reinterpret_cast<char*>(Pos), + sizeof(T)); + Value += Delta; + std::memcpy(reinterpret_cast<char*>(Pos), reinterpret_cast<char*>(&Value), + sizeof(T)); +} + /// relocate - Before the JIT can run a block of code that has been emitted, /// it must rewrite the code to contain the actual addresses of any /// referenced global symbols. @@ -545,24 +554,24 @@ void X86JITInfo::relocate(void *Function, MachineRelocation *MR, // PC relative relocation, add the relocated value to the value already in // memory, after we adjust it for where the PC is. ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal(); - *((unsigned*)RelocPos) += (unsigned)ResultPtr; + addUnaligned<unsigned>(RelocPos, ResultPtr); break; } case X86::reloc_picrel_word: { // PIC base relative relocation, add the relocated value to the value // already in memory, after we adjust it for where the PIC base is. ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal()); - *((unsigned*)RelocPos) += (unsigned)ResultPtr; + addUnaligned<unsigned>(RelocPos, ResultPtr); break; } case X86::reloc_absolute_word: case X86::reloc_absolute_word_sext: // Absolute relocation, just add the relocated value to the value already // in memory. - *((unsigned*)RelocPos) += (unsigned)ResultPtr; + addUnaligned<unsigned>(RelocPos, ResultPtr); break; case X86::reloc_absolute_dword: - *((intptr_t*)RelocPos) += ResultPtr; + addUnaligned<intptr_t>(RelocPos, ResultPtr); break; } } diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp index 9c0ce4e..cfd68f7 100644 --- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "X86MCInstLower.h" #include "X86AsmPrinter.h" #include "X86COFFMachineModuleInfo.h" #include "InstPrinter/X86ATTInstPrinter.h" @@ -29,6 +28,31 @@ #include "llvm/ADT/SmallString.h" using namespace llvm; +namespace { + +/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. +class X86MCInstLower { + MCContext &Ctx; + Mangler *Mang; + const MachineFunction &MF; + const TargetMachine &TM; + const MCAsmInfo &MAI; + X86AsmPrinter &AsmPrinter; +public: + X86MCInstLower(Mangler *mang, const MachineFunction &MF, + X86AsmPrinter &asmprinter); + + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + + MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; + MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + +private: + MachineModuleInfoMachO &getMachOMMI() const; +}; + +} // end anonymous namespace + X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf, X86AsmPrinter &asmprinter) : Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()), @@ -43,15 +67,11 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { /// operand to an MCSymbol. MCSymbol *X86MCInstLower:: GetSymbolFromOperand(const MachineOperand &MO) const { - assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference"); + assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference"); SmallString<128> Name; - if (!MO.isGlobal()) { - assert(MO.isSymbol()); - Name += MAI.getGlobalPrefix(); - Name += MO.getSymbolName(); - } else { + if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); bool isImplicitlyPrivate = false; if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB || @@ -61,6 +81,11 @@ GetSymbolFromOperand(const MachineOperand &MO) const { isImplicitlyPrivate = true; Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); + } else if (MO.isSymbol()) { + Name += MAI.getGlobalPrefix(); + Name += MO.getSymbolName(); + } else if (MO.isMBB()) { + Name += MO.getMBB()->getSymbol()->getName(); } // If the target flags on the operand changes the name of the symbol, do that @@ -191,7 +216,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, if (Expr == 0) Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx); - if (!MO.isJTI() && MO.getOffset()) + if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx); @@ -324,9 +349,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = MCOperand::CreateImm(MO.getImm()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( - MO.getMBB()->getSymbol(), Ctx)); - break; case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); @@ -371,18 +393,8 @@ ReSimplify: case X86::MOVZX64rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break; case X86::MOVZX64rr16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr16); break; case X86::MOVZX64rm16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm16); break; - case X86::SETB_C8r: LowerUnaryToTwoAddr(OutMI, X86::SBB8rr); break; - case X86::SETB_C16r: LowerUnaryToTwoAddr(OutMI, X86::SBB16rr); break; - case X86::SETB_C32r: LowerUnaryToTwoAddr(OutMI, X86::SBB32rr); break; - case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break; case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break; case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break; - case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break; - case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break; - case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break; - case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break; - case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break; - case X86::AVX2_SET0: LowerUnaryToTwoAddr(OutMI, X86::VPXORYrr); break; case X86::MOV16r0: LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0 diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.h b/contrib/llvm/lib/Target/X86/X86MCInstLower.h deleted file mode 100644 index b4d4cfd..0000000 --- a/contrib/llvm/lib/Target/X86/X86MCInstLower.h +++ /dev/null @@ -1,52 +0,0 @@ -//===-- X86MCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef X86_MCINSTLOWER_H -#define X86_MCINSTLOWER_H - -#include "llvm/Support/Compiler.h" - -namespace llvm { - class MCAsmInfo; - class MCContext; - class MCInst; - class MCOperand; - class MCSymbol; - class MachineInstr; - class MachineFunction; - class MachineModuleInfoMachO; - class MachineOperand; - class Mangler; - class TargetMachine; - class X86AsmPrinter; - -/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. -class LLVM_LIBRARY_VISIBILITY X86MCInstLower { - MCContext &Ctx; - Mangler *Mang; - const MachineFunction &MF; - const TargetMachine &TM; - const MCAsmInfo &MAI; - X86AsmPrinter &AsmPrinter; -public: - X86MCInstLower(Mangler *mang, const MachineFunction &MF, - X86AsmPrinter &asmprinter); - - void Lower(const MachineInstr *MI, MCInst &OutMI) const; - - MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; - MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; - -private: - MachineModuleInfoMachO &getMachOMMI() const; -}; - -} - -#endif diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp index 877b8f6..73ac747 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -106,23 +106,7 @@ X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { int X86RegisterInfo::getSEHRegNum(unsigned i) const { - int reg = X86_MC::getX86RegNum(i); - switch (i) { - case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B: - case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B: - case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B: - case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B: - case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B: - case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B: - case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B: - case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B: - case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: - case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: - case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: - case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: - reg += 8; - } - return reg; + return getEncodingValue(i); } const TargetRegisterClass * @@ -245,15 +229,26 @@ const uint16_t * X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { bool callsEHReturn = false; bool ghcCall = false; + bool oclBiCall = false; + bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); if (MF) { callsEHReturn = MF->getMMI().callsEHReturn(); const Function *F = MF->getFunction(); ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); + oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false); } if (ghcCall) return CSR_NoRegs_SaveList; + if (oclBiCall) { + if (HasAVX && IsWin64) + return CSR_Win64_Intel_OCL_BI_AVX_SaveList; + if (HasAVX && Is64Bit) + return CSR_64_Intel_OCL_BI_AVX_SaveList; + if (!HasAVX && !IsWin64 && Is64Bit) + return CSR_64_Intel_OCL_BI_SaveList; + } if (Is64Bit) { if (IsWin64) return CSR_Win64_SaveList; @@ -268,6 +263,16 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const uint32_t* X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); + + if (CC == CallingConv::Intel_OCL_BI) { + if (IsWin64 && HasAVX) + return CSR_Win64_Intel_OCL_BI_AVX_RegMask; + if (Is64Bit && HasAVX) + return CSR_64_Intel_OCL_BI_AVX_RegMask; + if (!HasAVX && !IsWin64 && Is64Bit) + return CSR_64_Intel_OCL_BI_RegMask; + } if (CC == CallingConv::GHC) return CSR_NoRegs_RegMask; if (!Is64Bit) @@ -277,6 +282,11 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { return CSR_64_RegMask; } +const uint32_t* +X86RegisterInfo::getNoPreservedMask() const { + return CSR_NoRegs_RegMask; +} + BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); @@ -398,8 +408,9 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); - bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || - F->hasFnAttr(Attribute::StackAlignment)); + bool requiresRealignment = + ((MFI->getMaxAlignment() > StackAlign) || + F->getFnAttributes().hasAttribute(Attributes::StackAlignment)); // If we've requested that we force align the stack do so now. if (ForceStackAlign) @@ -522,7 +533,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const{ + int SPAdj, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -590,9 +601,10 @@ unsigned X86RegisterInfo::getEHHandlerRegister() const { } namespace llvm { -unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { - switch (VT.getSimpleVT().SimpleTy) { - default: return Reg; +unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, + bool High) { + switch (VT) { + default: llvm_unreachable("Unexpected VT"); case MVT::i8: if (High) { switch (Reg) { @@ -608,7 +620,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { } } else { switch (Reg) { - default: return 0; + default: llvm_unreachable("Unexpected register"); case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AL; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -645,7 +657,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { } case MVT::i16: switch (Reg) { - default: return Reg; + default: llvm_unreachable("Unexpected register"); case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AX; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -681,7 +693,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { } case MVT::i32: switch (Reg) { - default: return Reg; + default: llvm_unreachable("Unexpected register"); case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::EAX; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -733,7 +745,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { } } switch (Reg) { - default: return Reg; + default: llvm_unreachable("Unexpected register"); case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::RAX; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h index 1bc32cb..7932ede 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h @@ -58,10 +58,6 @@ private: public: X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii); - /// getX86RegNum - Returns the native X86 register number for the given LLVM - /// register identifier. - static unsigned getX86RegNum(unsigned RegNo); - // FIXME: This should be tablegen'd like getDwarfRegNum is int getSEHRegNum(unsigned i) const; @@ -104,6 +100,7 @@ public: /// callee-save registers on this target. const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; + const uint32_t *getNoPreservedMask() const; /// getReservedRegs - Returns a bitset indexed by physical register number /// indicating if a register is a special register that has particular uses and @@ -141,8 +138,8 @@ public: // getX86SubSuperRegister - X86 utility function. It returns the sub or super // register of a specific X86 register. -// e.g. getX86SubSuperRegister(X86::EAX, EVT::i16) return X86:AX -unsigned getX86SubSuperRegister(unsigned, EVT, bool High=false); +// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX +unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false); } // End llvm namespace diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td index edc7184..be6282a 100644 --- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td @@ -13,258 +13,264 @@ // //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// Register definitions... -// -let Namespace = "X86" in { +class X86Reg<string n, bits<16> Enc, list<Register> subregs = []> : Register<n> { + let Namespace = "X86"; + let HWEncoding = Enc; + let SubRegs = subregs; +} - // Subregister indices. +// Subregister indices. +let Namespace = "X86" in { def sub_8bit : SubRegIndex; def sub_8bit_hi : SubRegIndex; def sub_16bit : SubRegIndex; def sub_32bit : SubRegIndex; - def sub_xmm : SubRegIndex; - - - // In the register alias definitions below, we define which registers alias - // which others. We only specify which registers the small registers alias, - // because the register file generator is smart enough to figure out that - // AL aliases AX if we tell it that AX aliased AL (for example). - - // Dwarf numbering is different for 32-bit and 64-bit, and there are - // variations by target as well. Currently the first entry is for X86-64, - // second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux - // and debug information on X86-32/Darwin) - - // 8-bit registers - // Low registers - def AL : Register<"al">; - def DL : Register<"dl">; - def CL : Register<"cl">; - def BL : Register<"bl">; - - // X86-64 only, requires REX. - let CostPerUse = 1 in { - def SIL : Register<"sil">; - def DIL : Register<"dil">; - def BPL : Register<"bpl">; - def SPL : Register<"spl">; - def R8B : Register<"r8b">; - def R9B : Register<"r9b">; - def R10B : Register<"r10b">; - def R11B : Register<"r11b">; - def R12B : Register<"r12b">; - def R13B : Register<"r13b">; - def R14B : Register<"r14b">; - def R15B : Register<"r15b">; - } - - // High registers. On x86-64, these cannot be used in any instruction - // with a REX prefix. - def AH : Register<"ah">; - def DH : Register<"dh">; - def CH : Register<"ch">; - def BH : Register<"bh">; - - // 16-bit registers - let SubRegIndices = [sub_8bit, sub_8bit_hi], CoveredBySubRegs = 1 in { - def AX : RegisterWithSubRegs<"ax", [AL,AH]>; - def DX : RegisterWithSubRegs<"dx", [DL,DH]>; - def CX : RegisterWithSubRegs<"cx", [CL,CH]>; - def BX : RegisterWithSubRegs<"bx", [BL,BH]>; - } - let SubRegIndices = [sub_8bit] in { - def SI : RegisterWithSubRegs<"si", [SIL]>; - def DI : RegisterWithSubRegs<"di", [DIL]>; - def BP : RegisterWithSubRegs<"bp", [BPL]>; - def SP : RegisterWithSubRegs<"sp", [SPL]>; - } - def IP : Register<"ip">; - - // X86-64 only, requires REX. - let SubRegIndices = [sub_8bit], CostPerUse = 1 in { - def R8W : RegisterWithSubRegs<"r8w", [R8B]>; - def R9W : RegisterWithSubRegs<"r9w", [R9B]>; - def R10W : RegisterWithSubRegs<"r10w", [R10B]>; - def R11W : RegisterWithSubRegs<"r11w", [R11B]>; - def R12W : RegisterWithSubRegs<"r12w", [R12B]>; - def R13W : RegisterWithSubRegs<"r13w", [R13B]>; - def R14W : RegisterWithSubRegs<"r14w", [R14B]>; - def R15W : RegisterWithSubRegs<"r15w", [R15B]>; - } - // 32-bit registers - let SubRegIndices = [sub_16bit] in { - def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[-2, 0, 0]>; - def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[-2, 2, 2]>; - def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[-2, 1, 1]>; - def EBX : RegisterWithSubRegs<"ebx", [BX]>, DwarfRegNum<[-2, 3, 3]>; - def ESI : RegisterWithSubRegs<"esi", [SI]>, DwarfRegNum<[-2, 6, 6]>; - def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[-2, 7, 7]>; - def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[-2, 4, 5]>; - def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[-2, 5, 4]>; - def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[-2, 8, 8]>; - - // X86-64 only, requires REX - let CostPerUse = 1 in { - def R8D : RegisterWithSubRegs<"r8d", [R8W]>; - def R9D : RegisterWithSubRegs<"r9d", [R9W]>; - def R10D : RegisterWithSubRegs<"r10d", [R10W]>; - def R11D : RegisterWithSubRegs<"r11d", [R11W]>; - def R12D : RegisterWithSubRegs<"r12d", [R12W]>; - def R13D : RegisterWithSubRegs<"r13d", [R13W]>; - def R14D : RegisterWithSubRegs<"r14d", [R14W]>; - def R15D : RegisterWithSubRegs<"r15d", [R15W]>; - }} - - // 64-bit registers, X86-64 only - let SubRegIndices = [sub_32bit] in { - def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>; - def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>; - def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>; - def RBX : RegisterWithSubRegs<"rbx", [EBX]>, DwarfRegNum<[3, -2, -2]>; - def RSI : RegisterWithSubRegs<"rsi", [ESI]>, DwarfRegNum<[4, -2, -2]>; - def RDI : RegisterWithSubRegs<"rdi", [EDI]>, DwarfRegNum<[5, -2, -2]>; - def RBP : RegisterWithSubRegs<"rbp", [EBP]>, DwarfRegNum<[6, -2, -2]>; - def RSP : RegisterWithSubRegs<"rsp", [ESP]>, DwarfRegNum<[7, -2, -2]>; - - // These also require REX. - let CostPerUse = 1 in { - def R8 : RegisterWithSubRegs<"r8", [R8D]>, DwarfRegNum<[8, -2, -2]>; - def R9 : RegisterWithSubRegs<"r9", [R9D]>, DwarfRegNum<[9, -2, -2]>; - def R10 : RegisterWithSubRegs<"r10", [R10D]>, DwarfRegNum<[10, -2, -2]>; - def R11 : RegisterWithSubRegs<"r11", [R11D]>, DwarfRegNum<[11, -2, -2]>; - def R12 : RegisterWithSubRegs<"r12", [R12D]>, DwarfRegNum<[12, -2, -2]>; - def R13 : RegisterWithSubRegs<"r13", [R13D]>, DwarfRegNum<[13, -2, -2]>; - def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>; - def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>; - def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>; - }} - - // MMX Registers. These are actually aliased to ST0 .. ST7 - def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>; - def MM1 : Register<"mm1">, DwarfRegNum<[42, 30, 30]>; - def MM2 : Register<"mm2">, DwarfRegNum<[43, 31, 31]>; - def MM3 : Register<"mm3">, DwarfRegNum<[44, 32, 32]>; - def MM4 : Register<"mm4">, DwarfRegNum<[45, 33, 33]>; - def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>; - def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>; - def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>; - - // Pseudo Floating Point registers - def FP0 : Register<"fp0">; - def FP1 : Register<"fp1">; - def FP2 : Register<"fp2">; - def FP3 : Register<"fp3">; - def FP4 : Register<"fp4">; - def FP5 : Register<"fp5">; - def FP6 : Register<"fp6">; - - // XMM Registers, used by the various SSE instruction set extensions. - def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>; - def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>; - def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>; - def XMM3: Register<"xmm3">, DwarfRegNum<[20, 24, 24]>; - def XMM4: Register<"xmm4">, DwarfRegNum<[21, 25, 25]>; - def XMM5: Register<"xmm5">, DwarfRegNum<[22, 26, 26]>; - def XMM6: Register<"xmm6">, DwarfRegNum<[23, 27, 27]>; - def XMM7: Register<"xmm7">, DwarfRegNum<[24, 28, 28]>; - - // X86-64 only - let CostPerUse = 1 in { - def XMM8: Register<"xmm8">, DwarfRegNum<[25, -2, -2]>; - def XMM9: Register<"xmm9">, DwarfRegNum<[26, -2, -2]>; - def XMM10: Register<"xmm10">, DwarfRegNum<[27, -2, -2]>; - def XMM11: Register<"xmm11">, DwarfRegNum<[28, -2, -2]>; - def XMM12: Register<"xmm12">, DwarfRegNum<[29, -2, -2]>; - def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>; - def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>; - def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; - } // CostPerUse - - // YMM Registers, used by AVX instructions - let SubRegIndices = [sub_xmm] in { - def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegAlias<XMM0>; - def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegAlias<XMM1>; - def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegAlias<XMM2>; - def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegAlias<XMM3>; - def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegAlias<XMM4>; - def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegAlias<XMM5>; - def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegAlias<XMM6>; - def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegAlias<XMM7>; - def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegAlias<XMM8>; - def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegAlias<XMM9>; - def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegAlias<XMM10>; - def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegAlias<XMM11>; - def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegAlias<XMM12>; - def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegAlias<XMM13>; - def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegAlias<XMM14>; - def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegAlias<XMM15>; - } - - class STRegister<string Name, list<Register> A> : Register<Name> { - let Aliases = A; - } - - // Floating point stack registers. These don't map one-to-one to the FP - // pseudo registers, but we still mark them as aliasing FP registers. That - // way both kinds can be live without exceeding the stack depth. ST registers - // are only live around inline assembly. - def ST0 : STRegister<"st(0)", []>, DwarfRegNum<[33, 12, 11]>; - def ST1 : STRegister<"st(1)", [FP6]>, DwarfRegNum<[34, 13, 12]>; - def ST2 : STRegister<"st(2)", [FP5]>, DwarfRegNum<[35, 14, 13]>; - def ST3 : STRegister<"st(3)", [FP4]>, DwarfRegNum<[36, 15, 14]>; - def ST4 : STRegister<"st(4)", [FP3]>, DwarfRegNum<[37, 16, 15]>; - def ST5 : STRegister<"st(5)", [FP2]>, DwarfRegNum<[38, 17, 16]>; - def ST6 : STRegister<"st(6)", [FP1]>, DwarfRegNum<[39, 18, 17]>; - def ST7 : STRegister<"st(7)", [FP0]>, DwarfRegNum<[40, 19, 18]>; - - // Floating-point status word - def FPSW : Register<"fpsw">; - - // Status flags register - def EFLAGS : Register<"flags">; - - // Segment registers - def CS : Register<"cs">; - def DS : Register<"ds">; - def SS : Register<"ss">; - def ES : Register<"es">; - def FS : Register<"fs">; - def GS : Register<"gs">; - - // Debug registers - def DR0 : Register<"dr0">; - def DR1 : Register<"dr1">; - def DR2 : Register<"dr2">; - def DR3 : Register<"dr3">; - def DR4 : Register<"dr4">; - def DR5 : Register<"dr5">; - def DR6 : Register<"dr6">; - def DR7 : Register<"dr7">; - - // Control registers - def CR0 : Register<"cr0">; - def CR1 : Register<"cr1">; - def CR2 : Register<"cr2">; - def CR3 : Register<"cr3">; - def CR4 : Register<"cr4">; - def CR5 : Register<"cr5">; - def CR6 : Register<"cr6">; - def CR7 : Register<"cr7">; - def CR8 : Register<"cr8">; - def CR9 : Register<"cr9">; - def CR10 : Register<"cr10">; - def CR11 : Register<"cr11">; - def CR12 : Register<"cr12">; - def CR13 : Register<"cr13">; - def CR14 : Register<"cr14">; - def CR15 : Register<"cr15">; - - // Pseudo index registers - def EIZ : Register<"eiz">; - def RIZ : Register<"riz">; + def sub_xmm : SubRegIndex; } +//===----------------------------------------------------------------------===// +// Register definitions... +// + +// In the register alias definitions below, we define which registers alias +// which others. We only specify which registers the small registers alias, +// because the register file generator is smart enough to figure out that +// AL aliases AX if we tell it that AX aliased AL (for example). + +// Dwarf numbering is different for 32-bit and 64-bit, and there are +// variations by target as well. Currently the first entry is for X86-64, +// second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux +// and debug information on X86-32/Darwin) + +// 8-bit registers +// Low registers +def AL : X86Reg<"al", 0>; +def DL : X86Reg<"dl", 2>; +def CL : X86Reg<"cl", 1>; +def BL : X86Reg<"bl", 3>; + +// High registers. On x86-64, these cannot be used in any instruction +// with a REX prefix. +def AH : X86Reg<"ah", 4>; +def DH : X86Reg<"dh", 6>; +def CH : X86Reg<"ch", 5>; +def BH : X86Reg<"bh", 7>; + +// X86-64 only, requires REX. +let CostPerUse = 1 in { +def SIL : X86Reg<"sil", 6>; +def DIL : X86Reg<"dil", 7>; +def BPL : X86Reg<"bpl", 5>; +def SPL : X86Reg<"spl", 4>; +def R8B : X86Reg<"r8b", 8>; +def R9B : X86Reg<"r9b", 9>; +def R10B : X86Reg<"r10b", 10>; +def R11B : X86Reg<"r11b", 11>; +def R12B : X86Reg<"r12b", 12>; +def R13B : X86Reg<"r13b", 13>; +def R14B : X86Reg<"r14b", 14>; +def R15B : X86Reg<"r15b", 15>; +} + +// 16-bit registers +let SubRegIndices = [sub_8bit, sub_8bit_hi], CoveredBySubRegs = 1 in { +def AX : X86Reg<"ax", 0, [AL,AH]>; +def DX : X86Reg<"dx", 2, [DL,DH]>; +def CX : X86Reg<"cx", 1, [CL,CH]>; +def BX : X86Reg<"bx", 3, [BL,BH]>; +} +let SubRegIndices = [sub_8bit] in { +def SI : X86Reg<"si", 6, [SIL]>; +def DI : X86Reg<"di", 7, [DIL]>; +def BP : X86Reg<"bp", 5, [BPL]>; +def SP : X86Reg<"sp", 4, [SPL]>; +} +def IP : X86Reg<"ip", 0>; + +// X86-64 only, requires REX. +let SubRegIndices = [sub_8bit], CostPerUse = 1 in { +def R8W : X86Reg<"r8w", 8, [R8B]>; +def R9W : X86Reg<"r9w", 9, [R9B]>; +def R10W : X86Reg<"r10w", 10, [R10B]>; +def R11W : X86Reg<"r11w", 11, [R11B]>; +def R12W : X86Reg<"r12w", 12, [R12B]>; +def R13W : X86Reg<"r13w", 13, [R13B]>; +def R14W : X86Reg<"r14w", 14, [R14B]>; +def R15W : X86Reg<"r15w", 15, [R15B]>; +} + +// 32-bit registers +let SubRegIndices = [sub_16bit] in { +def EAX : X86Reg<"eax", 0, [AX]>, DwarfRegNum<[-2, 0, 0]>; +def EDX : X86Reg<"edx", 2, [DX]>, DwarfRegNum<[-2, 2, 2]>; +def ECX : X86Reg<"ecx", 1, [CX]>, DwarfRegNum<[-2, 1, 1]>; +def EBX : X86Reg<"ebx", 3, [BX]>, DwarfRegNum<[-2, 3, 3]>; +def ESI : X86Reg<"esi", 6, [SI]>, DwarfRegNum<[-2, 6, 6]>; +def EDI : X86Reg<"edi", 7, [DI]>, DwarfRegNum<[-2, 7, 7]>; +def EBP : X86Reg<"ebp", 5, [BP]>, DwarfRegNum<[-2, 4, 5]>; +def ESP : X86Reg<"esp", 4, [SP]>, DwarfRegNum<[-2, 5, 4]>; +def EIP : X86Reg<"eip", 0, [IP]>, DwarfRegNum<[-2, 8, 8]>; + +// X86-64 only, requires REX +let CostPerUse = 1 in { +def R8D : X86Reg<"r8d", 8, [R8W]>; +def R9D : X86Reg<"r9d", 9, [R9W]>; +def R10D : X86Reg<"r10d", 10, [R10W]>; +def R11D : X86Reg<"r11d", 11, [R11W]>; +def R12D : X86Reg<"r12d", 12, [R12W]>; +def R13D : X86Reg<"r13d", 13, [R13W]>; +def R14D : X86Reg<"r14d", 14, [R14W]>; +def R15D : X86Reg<"r15d", 15, [R15W]>; +}} + +// 64-bit registers, X86-64 only +let SubRegIndices = [sub_32bit] in { +def RAX : X86Reg<"rax", 0, [EAX]>, DwarfRegNum<[0, -2, -2]>; +def RDX : X86Reg<"rdx", 2, [EDX]>, DwarfRegNum<[1, -2, -2]>; +def RCX : X86Reg<"rcx", 1, [ECX]>, DwarfRegNum<[2, -2, -2]>; +def RBX : X86Reg<"rbx", 3, [EBX]>, DwarfRegNum<[3, -2, -2]>; +def RSI : X86Reg<"rsi", 6, [ESI]>, DwarfRegNum<[4, -2, -2]>; +def RDI : X86Reg<"rdi", 7, [EDI]>, DwarfRegNum<[5, -2, -2]>; +def RBP : X86Reg<"rbp", 5, [EBP]>, DwarfRegNum<[6, -2, -2]>; +def RSP : X86Reg<"rsp", 4, [ESP]>, DwarfRegNum<[7, -2, -2]>; + +// These also require REX. +let CostPerUse = 1 in { +def R8 : X86Reg<"r8", 8, [R8D]>, DwarfRegNum<[ 8, -2, -2]>; +def R9 : X86Reg<"r9", 9, [R9D]>, DwarfRegNum<[ 9, -2, -2]>; +def R10 : X86Reg<"r10", 10, [R10D]>, DwarfRegNum<[10, -2, -2]>; +def R11 : X86Reg<"r11", 11, [R11D]>, DwarfRegNum<[11, -2, -2]>; +def R12 : X86Reg<"r12", 12, [R12D]>, DwarfRegNum<[12, -2, -2]>; +def R13 : X86Reg<"r13", 13, [R13D]>, DwarfRegNum<[13, -2, -2]>; +def R14 : X86Reg<"r14", 14, [R14D]>, DwarfRegNum<[14, -2, -2]>; +def R15 : X86Reg<"r15", 15, [R15D]>, DwarfRegNum<[15, -2, -2]>; +def RIP : X86Reg<"rip", 0, [EIP]>, DwarfRegNum<[16, -2, -2]>; +}} + +// MMX Registers. These are actually aliased to ST0 .. ST7 +def MM0 : X86Reg<"mm0", 0>, DwarfRegNum<[41, 29, 29]>; +def MM1 : X86Reg<"mm1", 1>, DwarfRegNum<[42, 30, 30]>; +def MM2 : X86Reg<"mm2", 2>, DwarfRegNum<[43, 31, 31]>; +def MM3 : X86Reg<"mm3", 3>, DwarfRegNum<[44, 32, 32]>; +def MM4 : X86Reg<"mm4", 4>, DwarfRegNum<[45, 33, 33]>; +def MM5 : X86Reg<"mm5", 5>, DwarfRegNum<[46, 34, 34]>; +def MM6 : X86Reg<"mm6", 6>, DwarfRegNum<[47, 35, 35]>; +def MM7 : X86Reg<"mm7", 7>, DwarfRegNum<[48, 36, 36]>; + +// Pseudo Floating Point registers +def FP0 : X86Reg<"fp0", 0>; +def FP1 : X86Reg<"fp1", 0>; +def FP2 : X86Reg<"fp2", 0>; +def FP3 : X86Reg<"fp3", 0>; +def FP4 : X86Reg<"fp4", 0>; +def FP5 : X86Reg<"fp5", 0>; +def FP6 : X86Reg<"fp6", 0>; + +// XMM Registers, used by the various SSE instruction set extensions. +def XMM0: X86Reg<"xmm0", 0>, DwarfRegNum<[17, 21, 21]>; +def XMM1: X86Reg<"xmm1", 1>, DwarfRegNum<[18, 22, 22]>; +def XMM2: X86Reg<"xmm2", 2>, DwarfRegNum<[19, 23, 23]>; +def XMM3: X86Reg<"xmm3", 3>, DwarfRegNum<[20, 24, 24]>; +def XMM4: X86Reg<"xmm4", 4>, DwarfRegNum<[21, 25, 25]>; +def XMM5: X86Reg<"xmm5", 5>, DwarfRegNum<[22, 26, 26]>; +def XMM6: X86Reg<"xmm6", 6>, DwarfRegNum<[23, 27, 27]>; +def XMM7: X86Reg<"xmm7", 7>, DwarfRegNum<[24, 28, 28]>; + +// X86-64 only +let CostPerUse = 1 in { +def XMM8: X86Reg<"xmm8", 8>, DwarfRegNum<[25, -2, -2]>; +def XMM9: X86Reg<"xmm9", 9>, DwarfRegNum<[26, -2, -2]>; +def XMM10: X86Reg<"xmm10", 10>, DwarfRegNum<[27, -2, -2]>; +def XMM11: X86Reg<"xmm11", 11>, DwarfRegNum<[28, -2, -2]>; +def XMM12: X86Reg<"xmm12", 12>, DwarfRegNum<[29, -2, -2]>; +def XMM13: X86Reg<"xmm13", 13>, DwarfRegNum<[30, -2, -2]>; +def XMM14: X86Reg<"xmm14", 14>, DwarfRegNum<[31, -2, -2]>; +def XMM15: X86Reg<"xmm15", 15>, DwarfRegNum<[32, -2, -2]>; +} // CostPerUse + +// YMM Registers, used by AVX instructions +let SubRegIndices = [sub_xmm] in { +def YMM0: X86Reg<"ymm0", 0, [XMM0]>, DwarfRegAlias<XMM0>; +def YMM1: X86Reg<"ymm1", 1, [XMM1]>, DwarfRegAlias<XMM1>; +def YMM2: X86Reg<"ymm2", 2, [XMM2]>, DwarfRegAlias<XMM2>; +def YMM3: X86Reg<"ymm3", 3, [XMM3]>, DwarfRegAlias<XMM3>; +def YMM4: X86Reg<"ymm4", 4, [XMM4]>, DwarfRegAlias<XMM4>; +def YMM5: X86Reg<"ymm5", 5, [XMM5]>, DwarfRegAlias<XMM5>; +def YMM6: X86Reg<"ymm6", 6, [XMM6]>, DwarfRegAlias<XMM6>; +def YMM7: X86Reg<"ymm7", 7, [XMM7]>, DwarfRegAlias<XMM7>; +def YMM8: X86Reg<"ymm8", 8, [XMM8]>, DwarfRegAlias<XMM8>; +def YMM9: X86Reg<"ymm9", 9, [XMM9]>, DwarfRegAlias<XMM9>; +def YMM10: X86Reg<"ymm10", 10, [XMM10]>, DwarfRegAlias<XMM10>; +def YMM11: X86Reg<"ymm11", 11, [XMM11]>, DwarfRegAlias<XMM11>; +def YMM12: X86Reg<"ymm12", 12, [XMM12]>, DwarfRegAlias<XMM12>; +def YMM13: X86Reg<"ymm13", 13, [XMM13]>, DwarfRegAlias<XMM13>; +def YMM14: X86Reg<"ymm14", 14, [XMM14]>, DwarfRegAlias<XMM14>; +def YMM15: X86Reg<"ymm15", 15, [XMM15]>, DwarfRegAlias<XMM15>; +} + +class STRegister<string n, bits<16> Enc, list<Register> A> : X86Reg<n, Enc> { + let Aliases = A; +} + +// Floating point stack registers. These don't map one-to-one to the FP +// pseudo registers, but we still mark them as aliasing FP registers. That +// way both kinds can be live without exceeding the stack depth. ST registers +// are only live around inline assembly. +def ST0 : STRegister<"st(0)", 0, []>, DwarfRegNum<[33, 12, 11]>; +def ST1 : STRegister<"st(1)", 1, [FP6]>, DwarfRegNum<[34, 13, 12]>; +def ST2 : STRegister<"st(2)", 2, [FP5]>, DwarfRegNum<[35, 14, 13]>; +def ST3 : STRegister<"st(3)", 3, [FP4]>, DwarfRegNum<[36, 15, 14]>; +def ST4 : STRegister<"st(4)", 4, [FP3]>, DwarfRegNum<[37, 16, 15]>; +def ST5 : STRegister<"st(5)", 5, [FP2]>, DwarfRegNum<[38, 17, 16]>; +def ST6 : STRegister<"st(6)", 6, [FP1]>, DwarfRegNum<[39, 18, 17]>; +def ST7 : STRegister<"st(7)", 7, [FP0]>, DwarfRegNum<[40, 19, 18]>; + +// Floating-point status word +def FPSW : X86Reg<"fpsw", 0>; + +// Status flags register +def EFLAGS : X86Reg<"flags", 0>; + +// Segment registers +def CS : X86Reg<"cs", 1>; +def DS : X86Reg<"ds", 3>; +def SS : X86Reg<"ss", 2>; +def ES : X86Reg<"es", 0>; +def FS : X86Reg<"fs", 4>; +def GS : X86Reg<"gs", 5>; + +// Debug registers +def DR0 : X86Reg<"dr0", 0>; +def DR1 : X86Reg<"dr1", 1>; +def DR2 : X86Reg<"dr2", 2>; +def DR3 : X86Reg<"dr3", 3>; +def DR4 : X86Reg<"dr4", 4>; +def DR5 : X86Reg<"dr5", 5>; +def DR6 : X86Reg<"dr6", 6>; +def DR7 : X86Reg<"dr7", 7>; + +// Control registers +def CR0 : X86Reg<"cr0", 0>; +def CR1 : X86Reg<"cr1", 1>; +def CR2 : X86Reg<"cr2", 2>; +def CR3 : X86Reg<"cr3", 3>; +def CR4 : X86Reg<"cr4", 4>; +def CR5 : X86Reg<"cr5", 5>; +def CR6 : X86Reg<"cr6", 6>; +def CR7 : X86Reg<"cr7", 7>; +def CR8 : X86Reg<"cr8", 8>; +def CR9 : X86Reg<"cr9", 9>; +def CR10 : X86Reg<"cr10", 10>; +def CR11 : X86Reg<"cr11", 11>; +def CR12 : X86Reg<"cr12", 12>; +def CR13 : X86Reg<"cr13", 13>; +def CR14 : X86Reg<"cr14", 14>; +def CR15 : X86Reg<"cr15", 15>; + +// Pseudo index registers +def EIZ : X86Reg<"eiz", 4>; +def RIZ : X86Reg<"riz", 4>; + //===----------------------------------------------------------------------===// // Register Class Definitions... now that we have all of the pieces, define the diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp index 00edcbc..723e50c 100644 --- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -54,7 +54,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, if (const char *bzeroEntry = V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { EVT IntPtr = TLI.getPointerTy(); - Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp index c2db11a..d1ed680 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp @@ -163,17 +163,6 @@ bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { return isTargetELF() || TM.getRelocationModel() == Reloc::Static; } -/// getSpecialAddressLatency - For targets where it is beneficial to -/// backschedule instructions that compute addresses, return a value -/// indicating the number of scheduling cycles of backscheduling that -/// should be attempted. -unsigned X86Subtarget::getSpecialAddressLatency() const { - // For x86 out-of-order targets, back-schedule address computations so - // that loads and stores aren't blocked. - // This value was chosen arbitrarily. - return 200; -} - void X86Subtarget::AutoDetectSubtargetFeatures() { unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; unsigned MaxLevel; @@ -313,6 +302,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasBMI2 = true; ToggleFeature(X86::FeatureBMI2); } + if (IsIntel && ((EBX >> 11) & 0x1)) { + HasRTM = true; + ToggleFeature(X86::FeatureRTM); + } } } } @@ -341,11 +334,13 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasLZCNT(false) , HasBMI(false) , HasBMI2(false) + , HasRTM(false) , IsBTMemSlow(false) , IsUAMemFast(false) , HasVectorUAMem(false) , HasCmpxchg16b(false) , UseLeaForSP(false) + , HasSlowDivide(false) , PostRAScheduler(false) , stackAlignment(4) // FIXME: this is a known good value for Yonah. How about others? @@ -400,6 +395,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, } } + // CPUName may have been set by the CPU detection code. Make sure the + // new MCSchedModel is used. + InitMCProcessorInfo(CPUName, FS); + if (X86ProcFamily == IntelAtom) PostRAScheduler = true; @@ -416,8 +415,8 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, assert((!In64BitMode || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both 32 and 64 - // bit) and for all 64-bit targets. + // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both + // 32 and 64 bit) and for all 64-bit targets. if (StackAlignOverride) stackAlignment = StackAlignOverride; else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() || diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h index 6841c5b..8bf4cc7 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.h +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h @@ -118,6 +118,9 @@ protected: /// HasBMI2 - Processor has BMI2 instructions. bool HasBMI2; + /// HasRTM - Processor has RTM instructions. + bool HasRTM; + /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; @@ -136,6 +139,10 @@ protected: /// the stack pointer. This is an optimization for Intel Atom processors. bool UseLeaForSP; + /// HasSlowDivide - True if smaller divides are significantly faster than + /// full divides and should be used when possible. + bool HasSlowDivide; + /// PostRAScheduler - True if using post-register-allocation scheduler. bool PostRAScheduler; @@ -205,7 +212,8 @@ public: bool hasAES() const { return HasAES; } bool hasPCLMUL() const { return HasPCLMUL; } bool hasFMA() const { return HasFMA; } - bool hasFMA4() const { return HasFMA4; } + // FIXME: Favor FMA when both are enabled. Is this the right thing to do? + bool hasFMA4() const { return HasFMA4 && !HasFMA; } bool hasXOP() const { return HasXOP; } bool hasMOVBE() const { return HasMOVBE; } bool hasRDRAND() const { return HasRDRAND; } @@ -214,11 +222,13 @@ public: bool hasLZCNT() const { return HasLZCNT; } bool hasBMI() const { return HasBMI; } bool hasBMI2() const { return HasBMI2; } + bool hasRTM() const { return HasRTM; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } + bool hasSlowDivide() const { return HasSlowDivide; } bool isAtom() const { return X86ProcFamily == IntelAtom; } @@ -231,10 +241,10 @@ public: bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; } - - // ELF is a reasonably sane default and the only other X86 targets we - // support are Darwin and Windows. Just use "not those". - bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } + bool isTargetELF() const { + return (TargetTriple.getEnvironment() == Triple::ELF || + TargetTriple.isOSBinFormatELF()); + } bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NativeClient; @@ -245,7 +255,10 @@ public: bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; } bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; } bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); } - bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } + bool isTargetCOFF() const { + return (TargetTriple.getEnvironment() != Triple::ELF && + TargetTriple.isOSBinFormatCOFF()); + } bool isTargetEnvMacho() const { return TargetTriple.isEnvironmentMachO(); } bool isTargetWin64() const { @@ -296,12 +309,6 @@ public: /// returns null. const char *getBZeroEntry() const; - /// getSpecialAddressLatency - For targets where it is beneficial to - /// backschedule instructions that compute addresses, return a value - /// indicating the number of scheduling cycles of backscheduling that - /// should be attempted. - unsigned getSpecialAddressLatency() const; - /// enablePostRAScheduler - run for Atom optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp index b7ba568..158f9dc 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -36,7 +36,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false), - DataLayout(getSubtargetImpl()->isTargetDarwin() ? + DL(getSubtargetImpl()->isTargetDarwin() ? "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-" "n8:16:32-S128" : (getSubtargetImpl()->isTargetCygMing() || @@ -48,7 +48,8 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, InstrInfo(*this), TSInfo(*this), TLInfo(*this), - JITInfo(*this) { + JITInfo(*this), + STTI(&TLInfo), VTTI(&TLInfo) { } void X86_64TargetMachine::anchor() { } @@ -59,12 +60,13 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true), - DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" + DL("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" "n8:16:32:64-S128"), InstrInfo(*this), TSInfo(*this), TLInfo(*this), - JITInfo(*this) { + JITInfo(*this), + STTI(&TLInfo), VTTI(&TLInfo){ } /// X86TargetMachine ctor - Create an X86 target. @@ -78,7 +80,6 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, Options.StackAlignmentOverride, is64Bit), FrameLowering(*this, Subtarget), - ELFWriterInfo(is64Bit, true), InstrItins(Subtarget.getInstrItineraryData()){ // Determine the PICStyle based on the target selected. if (getRelocationModel() == Reloc::Static) { @@ -113,6 +114,12 @@ UseVZeroUpper("x86-use-vzeroupper", cl::desc("Minimize AVX to SSE transition penalty"), cl::init(true)); +// Temporary option to control early if-conversion for x86 while adding machine +// models. +static cl::opt<bool> +X86EarlyIfConv("x86-early-ifcvt", + cl::desc("Enable early if-conversion on X86")); + //===----------------------------------------------------------------------===// // Pass Pipeline Configuration //===----------------------------------------------------------------------===// @@ -142,7 +149,7 @@ public: TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { X86PassConfig *PC = new X86PassConfig(this, PM); - if (Subtarget.hasCMov()) + if (X86EarlyIfConv && Subtarget.hasCMov()) PC->enablePass(&EarlyIfConverterID); return PC; diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h index 8e935af..12311a1 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetMachine.h +++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h @@ -15,7 +15,6 @@ #define X86TARGETMACHINE_H #include "X86.h" -#include "X86ELFWriterInfo.h" #include "X86InstrInfo.h" #include "X86ISelLowering.h" #include "X86FrameLowering.h" @@ -23,8 +22,9 @@ #include "X86SelectionDAGInfo.h" #include "X86Subtarget.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -33,7 +33,6 @@ class StringRef; class X86TargetMachine : public LLVMTargetMachine { X86Subtarget Subtarget; X86FrameLowering FrameLowering; - X86ELFWriterInfo ELFWriterInfo; InstrItineraryData InstrItins; public: @@ -62,9 +61,6 @@ public: virtual const X86RegisterInfo *getRegisterInfo() const { return &getInstrInfo()->getRegisterInfo(); } - virtual const X86ELFWriterInfo *getELFWriterInfo() const { - return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; - } virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } @@ -80,17 +76,19 @@ public: /// class X86_32TargetMachine : public X86TargetMachine { virtual void anchor(); - const TargetData DataLayout; // Calculates type size & alignment + const DataLayout DL; // Calculates type size & alignment X86InstrInfo InstrInfo; X86SelectionDAGInfo TSInfo; X86TargetLowering TLInfo; X86JITInfo JITInfo; + ScalarTargetTransformImpl STTI; + X86VectorTargetTransformInfo VTTI; public: X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); - virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const DataLayout *getDataLayout() const { return &DL; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; } @@ -103,23 +101,31 @@ public: virtual X86JITInfo *getJITInfo() { return &JITInfo; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } }; /// X86_64TargetMachine - X86 64-bit target machine. /// class X86_64TargetMachine : public X86TargetMachine { virtual void anchor(); - const TargetData DataLayout; // Calculates type size & alignment + const DataLayout DL; // Calculates type size & alignment X86InstrInfo InstrInfo; X86SelectionDAGInfo TSInfo; X86TargetLowering TLInfo; X86JITInfo JITInfo; + ScalarTargetTransformImpl STTI; + X86VectorTargetTransformInfo VTTI; public: X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); - virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const DataLayout *getDataLayout() const { return &DL; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; } @@ -132,6 +138,12 @@ public: virtual X86JITInfo *getJITInfo() { return &JITInfo; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp index 80b75dc..c4a5887 100644 --- a/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp +++ b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp @@ -42,7 +42,6 @@ namespace { private: const TargetInstrInfo *TII; // Machine instruction info. - MachineBasicBlock *MBB; // Current basic block // Any YMM register live-in to this function? bool FnHasLiveInYmm; @@ -84,7 +83,7 @@ namespace { // 2) All states must be clean for the result to be clean // 3) If none above and one unknown, the result state is also unknown // - unsigned computeState(unsigned PrevState, unsigned CurState) { + static unsigned computeState(unsigned PrevState, unsigned CurState) { if (PrevState == ST_INIT) return CurState; @@ -122,7 +121,7 @@ static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) { } static bool hasYmmReg(MachineInstr *MI) { - for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -148,7 +147,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { const TargetRegisterClass *RC = &X86::VR256RegClass; for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e; i++) { - if (MRI.isPhysRegUsed(*i)) { + if (!MRI.reg_nodbg_empty(*i)) { YMMUsed = true; break; } @@ -189,7 +188,6 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { bool Changed = false; unsigned BBNum = BB.getNumber(); - MBB = &BB; // Don't process already solved BBs if (BBSolved[BBNum]) @@ -207,7 +205,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, // The entry MBB for the function may set the initial state to dirty if // the function receives any YMM incoming arguments - if (MBB == MF.begin()) { + if (&BB == MF.begin()) { EntryState = ST_CLEAN; if (FnHasLiveInYmm) EntryState = ST_DIRTY; @@ -253,7 +251,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, // When unknown, only compute the information within the block to have // it available in the exit if possible, but don't change the block. if (EntryState != ST_UNKNOWN) { - BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER)); + BuildMI(BB, I, dl, TII->get(X86::VZEROUPPER)); ++NumVZU; } diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp index c76866f..caae562 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -31,7 +31,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -112,7 +112,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { EmitSpecialLLVMGlobal(GV)) return; - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM)); diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp index a4e5647..e18d973 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp @@ -23,7 +23,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/ErrorHandling.h" @@ -98,12 +98,13 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); bool FP = hasFP(MF); - bool Nested = MF.getFunction()-> - getAttributes().hasAttrSomewhere(Attribute::Nest); + const AttrListPtr &PAL = MF.getFunction()->getAttributes(); - if (Nested) { - loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII); - } + for (unsigned I = 0, E = PAL.getNumAttrs(); I != E; ++I) + if (PAL.getAttributesAtIndex(I).hasAttribute(Attributes::Nest)) { + loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII); + break; + } // Work out frame sizes. int FrameSize = MFI->getStackSize(); diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp index 8643ffc..9e7816e 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -285,7 +285,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const llvm_unreachable(0); } SDValue base = getGlobalAddressWrapper(GA, GV, DAG); - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); unsigned Size = TD->getTypeAllocSize(Ty); SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl), DAG.getConstant(Size, MVT::i32)); @@ -298,7 +298,7 @@ LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const DebugLoc DL = Op.getDebugLoc(); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true); + SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy()); return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result); } @@ -405,7 +405,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (allowsUnalignedMemoryAccesses(LD->getMemoryVT())) return SDValue(); - unsigned ABIAlignment = getTargetData()-> + unsigned ABIAlignment = getDataLayout()-> getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext())); // Leave aligned load alone. if (LD->getAlignment() >= ABIAlignment) @@ -477,7 +477,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } // Lower to a call to __misaligned_load(BasePtr). - Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -507,7 +507,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { return SDValue(); } - unsigned ABIAlignment = getTargetData()-> + unsigned ABIAlignment = getDataLayout()-> getABITypeAlignment(ST->getMemoryVT().getTypeForEVT(*DAG.getContext())); // Leave aligned store alone. if (ST->getAlignment() >= ABIAlignment) { @@ -536,7 +536,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const } // Lower to a call to __misaligned_store(BasePtr, Value). - Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -1499,7 +1499,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, if (StoreBits % 8) { break; } - unsigned ABIAlignment = getTargetData()->getABITypeAlignment( + unsigned ABIAlignment = getDataLayout()->getABITypeAlignment( ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext())); unsigned Alignment = ST->getAlignment(); if (Alignment >= ABIAlignment) { @@ -1570,7 +1570,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, if (Ty->getTypeID() == Type::VoidTyID) return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs); - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); unsigned Size = TD->getTypeAllocSize(Ty); if (AM.BaseGV) { return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 && diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td index ae646a2..3e7666b 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td +++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td @@ -33,7 +33,7 @@ def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink, SDNPVariadic]>; def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad]>; def SDT_XCoreBR_JT : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; @@ -58,7 +58,7 @@ def cprelwrapper : SDNode<"XCoreISD::CPRelativeWrapper", SDT_XCoreAddress, def SDT_XCoreStwsp : SDTypeProfile<0, 2, [SDTCisInt<1>]>; def XCoreStwsp : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp, - [SDNPHasChain]>; + [SDNPHasChain, SDNPMayStore]>; // These are target-independent nodes, but have target-specific formats. def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp index cdd0a08..be5855a 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -176,7 +176,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, #ifndef NDEBUG DEBUG(errs() << "\nFunction : " - << MF.getFunction()->getName() << "\n"); + << MF.getName() << "\n"); DEBUG(errs() << "<--------->\n"); DEBUG(MI.print(errs())); DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"); diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp index 11ec86b..d5a932c 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp @@ -27,12 +27,12 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), - DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" + DL("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" "i16:16:32-i32:32:32-i64:32:32-n32"), InstrInfo(), FrameLowering(Subtarget), TLInfo(*this), - TSInfo(*this) { + TSInfo(*this), STTI(&TLInfo), VTTI(&TLInfo) { } namespace { diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h index 2546681..c60c6a3 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h +++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h @@ -20,17 +20,20 @@ #include "XCoreISelLowering.h" #include "XCoreSelectionDAGInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetTransformImpl.h" +#include "llvm/DataLayout.h" namespace llvm { class XCoreTargetMachine : public LLVMTargetMachine { XCoreSubtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment + const DataLayout DL; // Calculates type size & alignment XCoreInstrInfo InstrInfo; XCoreFrameLowering FrameLowering; XCoreTargetLowering TLInfo; XCoreSelectionDAGInfo TSInfo; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; public: XCoreTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -53,7 +56,13 @@ public: virtual const TargetRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } + virtual const DataLayout *getDataLayout() const { return &DL; } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); |